From ed32181e595d27387badcd50e4251ffa299c201a Mon Sep 17 00:00:00 2001 From: Massimo Gengarelli Date: Thu, 4 Jul 2024 18:15:51 +0200 Subject: [PATCH] feat: implement configurable behavior for the nswatcher --- README.md | 39 +++-- cmd/chaosmonkey/main.go | 13 +- internal/configuration/behavior.go | 21 +++ internal/configuration/behavior_test.go | 58 +++++++ internal/configuration/logruslevel.go | 61 +++++++ .../{types_test.go => logruslevel_test.go} | 41 +---- internal/configuration/types.go | 98 +++-------- internal/watcher/namespace.go | 57 ++++++- internal/watcher/namespace_test.go | 152 ++++++++++++++++-- internal/watcher/types.go | 3 +- main.tf | 9 +- tests/kubetest.sh | 96 +++++++++++ 12 files changed, 508 insertions(+), 140 deletions(-) create mode 100644 internal/configuration/behavior.go create mode 100644 internal/configuration/behavior_test.go create mode 100644 internal/configuration/logruslevel.go rename internal/configuration/{types_test.go => logruslevel_test.go} (66%) diff --git a/README.md b/README.md index 8d0a7fc..5c51ad7 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,10 @@ Basically, it spawns a new [goroutine](https://go.dev/tour/concurrency/1) with a [CRD Watcher](#crd-watcher) every time a new namespace is detected and it stops the corresponding goroutine when a namespace is deleted. -In the future, there will be the possibility to blacklist (or whitelist) some namespaces -depending on an annotation. +The Namespace can be [configured](#configuration) to either monitor all namespaces by +default (with an opt-out strategy) or to monitor only the namespaces which contain the +label `cm.massix.github.io/namespace="true"`. Check the [Configuration](#configuration) +paragraph for more details. ### CRD Watcher We make use of a @@ -178,13 +180,32 @@ spec: ``` ## Configuration -The only configuration possible for the ChaosMonkey is setting the minimum log level, -this is done by setting the environment variable `CHAOSMONKEY_LOGLEVEL` to one of the -following values: `trace`, `debug`, `info`, `warn`, `error`, `critical` or `panic`. - -The value is not case-sensitive. - -Invalid or empty values will make ChaosMonkey default to the `info` level. +There are two configurable parts of the ChaosMonkey (on top of what the [CRD](./crds/chaosmonkey-configuration.yaml) +already permits of course). + +**Minimum Log Level**: this is configurable using the environment variable `CHAOSMONKEY_LOGLEVEL`, +it accepts the following self explaining values: `trace`, `debug`, `info`, `warn`, `error`, +`critical` or `panic` and it sets the minimum log level for all the logging of the ChaosMonkey. + +The value is not case-sensitive, invalid or empty values will make ChaosMonkey default to +the `info` level. + +**Default Behavior**: this is used to configure the way the [Namespace Watcher](#namespace-watcher) should +behave in regards of additions and modifications of namespaces and it uses the environment +variable `CHAOSMONKEY_BEHAVIOR`. It currently accepts two values: `AllowAll` or `DenyAll` +(not case sensitive). + +Setting it to `AllowAll` means that by default all namespaces are monitored, if +you want to opt-out a namespace you **must** create a new label in the +metadata of the namespace: `cm.massix.github.io/namespace="false"`, this will +make the Watcher ignore that namespace. All values which are not the string +`false` will cause the Watcher to take that namespace into account. + +Setting it to `DenyAll` means that by default all namespaces are ignored, if +you want to opt-in a namespace you **must** create a new label in +the metadata of the namespace: `cm.massix.github.io/namespace="true"`, this will +make the Watcher take that namespace into account. All values which are not +the string `true` will cause the Watcher to ignore that namespace. ## Observability The Chaos Monkey exposes some metrics using the [Prometheus](https://prometheus.io/) library and format. diff --git a/cmd/chaosmonkey/main.go b/cmd/chaosmonkey/main.go index d490a20..8bdc746 100644 --- a/cmd/chaosmonkey/main.go +++ b/cmd/chaosmonkey/main.go @@ -23,7 +23,7 @@ func main() { log := logrus.WithFields(logrus.Fields{"component": "main"}) // Get the LogLevel from the environment variable - ll, err := configuration.FromEnvironment() + ll, err := configuration.LogrusLevelFromEnvironment() if err != nil { log.Warnf("No loglevel provided, using default: %s", logrus.GetLevel()) } else { @@ -48,10 +48,19 @@ func main() { panic(err) } + log.Info("Configuring default behavior via environment variable") + behavior, err := configuration.BehaviorFromEnvironment() + if err != nil { + log.Warnf("Error while configuring default behavior: %s", err) + + behavior = configuration.AllowAll + log.Warnf("Using default behavior: %s", behavior) + } + clientset := kubernetes.NewForConfigOrDie(cfg) cmcClientset := versioned.NewForConfigOrDie(cfg) - nsWatcher := watcher.DefaultNamespaceFactory(clientset, cmcClientset, nil, namespace) + nsWatcher := watcher.DefaultNamespaceFactory(clientset, cmcClientset, nil, namespace, behavior) // Hook signals s := make(chan os.Signal, 1) diff --git a/internal/configuration/behavior.go b/internal/configuration/behavior.go new file mode 100644 index 0000000..4e4f700 --- /dev/null +++ b/internal/configuration/behavior.go @@ -0,0 +1,21 @@ +package configuration + +import ( + "errors" + "os" + "strings" +) + +func BehaviorFromEnvironment() (Behavior, error) { + if val, ok := os.LookupEnv("CHAOSMONKEY_BEHAVIOR"); ok { + val = strings.ToUpper(val) + + if val == string(AllowAll) || val == string(DenyAll) { + return Behavior(val), nil + } else { + return "", &InvalidBehavior{providedBehaviour: val} + } + } + + return "", errors.New("No environment variable provided") +} diff --git a/internal/configuration/behavior_test.go b/internal/configuration/behavior_test.go new file mode 100644 index 0000000..302581f --- /dev/null +++ b/internal/configuration/behavior_test.go @@ -0,0 +1,58 @@ +package configuration_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/massix/chaos-monkey/internal/configuration" +) + +func TestBehavior_ParseFromEnvironment(t *testing.T) { + goodEnvValues := map[string]configuration.Behavior{ + "allowall": configuration.AllowAll, + "ALLOWALL": configuration.AllowAll, + "AllOwAll": configuration.AllowAll, + "denyall": configuration.DenyAll, + "DenyAll": configuration.DenyAll, + "DENYALL": configuration.DenyAll, + } + + for key, val := range goodEnvValues { + t.Run(fmt.Sprintf("Can parse from environment (%s)", key), func(t *testing.T) { + t.Setenv("CHAOSMONKEY_BEHAVIOR", key) + + if b, err := configuration.BehaviorFromEnvironment(); err != nil { + t.Error(err) + } else if b != val { + t.Errorf("Was expecting %s, got %s instead", val, b) + } + }) + } + + badEnvValues := []string{ + "", + "invalid", + "geckos", + } + + for _, val := range badEnvValues { + t.Run(fmt.Sprintf("It fails for invalid strings (%s)", val), func(t *testing.T) { + t.Setenv("CHAOSMONKEY_BEHAVIOR", val) + + if b, err := configuration.BehaviorFromEnvironment(); err == nil { + t.Errorf("Was expecting error, received %s instead", b) + } else if err.Error() != fmt.Sprintf("Invalid behaviour: %s", strings.ToUpper(val)) { + t.Error(err) + } + }) + } + + t.Run("It fails if there is no environment variable", func(t *testing.T) { + if b, err := configuration.BehaviorFromEnvironment(); err == nil { + t.Errorf("Was expecting error, received %s instead", b) + } else if err.Error() != "No environment variable provided" { + t.Error(err) + } + }) +} diff --git a/internal/configuration/logruslevel.go b/internal/configuration/logruslevel.go new file mode 100644 index 0000000..f29487c --- /dev/null +++ b/internal/configuration/logruslevel.go @@ -0,0 +1,61 @@ +package configuration + +import ( + "errors" + "os" + "slices" + "strings" + + "github.com/sirupsen/logrus" +) + +func LogrusLevelFromEnvironment() (LogrusLevel, error) { + if val, ok := os.LookupEnv("CHAOSMONKEY_LOGLEVEL"); ok { + if newLevel, err := NewLogrusLevel(strings.ToLower(val)); err == nil { + return newLevel, nil + } else { + return "", err + } + } + + return "", errors.New("No environment variable for configuring the log level found.") +} + +func NewLogrusLevel(level string) (LogrusLevel, error) { + validLevels := []string{ + "panic", + "fatal", + "error", + "warn", + "info", + "debug", + "trace", + } + + if slices.Contains(validLevels, level) { + return LogrusLevel(level), nil + } + + return "", &InvalidLogrusLevel{level} +} + +func (l LogrusLevel) LogrusLevel() logrus.Level { + switch l { + case "panic": + return logrus.PanicLevel + case "fatal": + return logrus.FatalLevel + case "error": + return logrus.ErrorLevel + case "warn": + return logrus.WarnLevel + case "info": + return logrus.InfoLevel + case "debug": + return logrus.DebugLevel + case "trace": + return logrus.TraceLevel + default: + return logrus.InfoLevel + } +} diff --git a/internal/configuration/types_test.go b/internal/configuration/logruslevel_test.go similarity index 66% rename from internal/configuration/types_test.go rename to internal/configuration/logruslevel_test.go index 032e553..ca6c12e 100644 --- a/internal/configuration/types_test.go +++ b/internal/configuration/logruslevel_test.go @@ -1,7 +1,6 @@ package configuration_test import ( - "encoding/json" "fmt" "strings" "testing" @@ -10,10 +9,6 @@ import ( "github.com/sirupsen/logrus" ) -type UnmarshalTest struct { - Level configuration.LogrusLevel `json:"level"` -} - func Test_LogrusLevel(t *testing.T) { t.Run("Can create a logrus level", func(t *testing.T) { t.Parallel() @@ -40,43 +35,14 @@ func Test_LogrusLevel(t *testing.T) { t.Fatal(err) } }) - - t.Run("Can unmarshal a logrus level", func(t *testing.T) { - t.Parallel() - - var unmarshalTest UnmarshalTest - err := json.Unmarshal([]byte(`{ "level": "trace" }`), &unmarshalTest) - if err != nil { - t.Fatal(err) - } - - if unmarshalTest.Level.LogrusLevel() != logrus.TraceLevel { - t.Fatal(unmarshalTest.Level.LogrusLevel()) - } - }) - - t.Run("Will fail if level is not valid", func(t *testing.T) { - t.Parallel() - - var unmarshalTest UnmarshalTest - err := json.Unmarshal([]byte(`{ "level": "invalid" }`), &unmarshalTest) - if err == nil { - t.Fatal("Was expecting error") - } - - if err.Error() != "Invalid logrus level: invalid" { - t.Fatal(err) - } - }) } func TestLogLevel_FromEnvironment(t *testing.T) { for _, level := range []string{"PANIC", "FaTaL", "eRROR", "WARN", "info", "debug", "trace"} { - t.Logf("Testing with loglevel: %s", level) t.Run(fmt.Sprintf("Can set loglevel from environment (%s)", level), func(t *testing.T) { t.Setenv("CHAOSMONKEY_LOGLEVEL", level) - ll, err := configuration.FromEnvironment() + ll, err := configuration.LogrusLevelFromEnvironment() if err != nil { t.Fatal(err) } @@ -88,11 +54,10 @@ func TestLogLevel_FromEnvironment(t *testing.T) { } for _, level := range []string{"", "invalid", "geckos"} { - t.Logf("Testing with loglevel: %s", level) t.Run(fmt.Sprintf("It fails for invalid strings (%s)", level), func(t *testing.T) { t.Setenv("CHAOSMONKEY_LOGLEVEL", level) - ll, err := configuration.FromEnvironment() + ll, err := configuration.LogrusLevelFromEnvironment() if err == nil || ll != "" { t.Fatalf("Was not expecting to succeed: %s", ll) } @@ -107,7 +72,7 @@ func TestLogLevel_FromEnvironment(t *testing.T) { } t.Run("It fails if there is no environment variable", func(t *testing.T) { - ll, err := configuration.FromEnvironment() + ll, err := configuration.LogrusLevelFromEnvironment() if err == nil || ll != "" { t.Fatal("Was not expecting to succeed") } diff --git a/internal/configuration/types.go b/internal/configuration/types.go index 00c3fb3..5a0b152 100644 --- a/internal/configuration/types.go +++ b/internal/configuration/types.go @@ -1,90 +1,42 @@ package configuration import ( - "encoding/json" - "errors" "fmt" - "os" - "slices" - "strings" +) - "github.com/sirupsen/logrus" +type ( + Behavior string + LogrusLevel string ) +// Label to look for in the NS to either allow or deny the namespace +const NamespaceLabel = "cm.massix.github.io/namespace" + +const ( + AllowAll Behavior = "ALLOWALL" + DenyAll Behavior = "DENYALL" +) + +// Common errors while parsing the configuration type InvalidLogrusLevel struct { providedLevel string } -func (i *InvalidLogrusLevel) Error() string { - return fmt.Sprintf("Invalid logrus level: %s", i.providedLevel) -} - -type LogrusLevel string - -func FromEnvironment() (LogrusLevel, error) { - if val, ok := os.LookupEnv("CHAOSMONKEY_LOGLEVEL"); ok { - if newLevel, err := NewLogrusLevel(strings.ToLower(val)); err == nil { - return newLevel, nil - } else { - return "", err - } - } - - return "", errors.New("No environment variable for configuring the log level found.") +type InvalidBehavior struct { + providedBehaviour string } -func NewLogrusLevel(level string) (LogrusLevel, error) { - validLevels := []string{ - "panic", - "fatal", - "error", - "warn", - "info", - "debug", - "trace", - } - - if slices.Contains(validLevels, level) { - return LogrusLevel(level), nil - } - - return "", &InvalidLogrusLevel{level} -} +var ( + _ = (error)((*InvalidLogrusLevel)(nil)) + _ = (error)((*InvalidBehavior)(nil)) +) -func (l LogrusLevel) LogrusLevel() logrus.Level { - switch l { - case "panic": - return logrus.PanicLevel - case "fatal": - return logrus.FatalLevel - case "error": - return logrus.ErrorLevel - case "warn": - return logrus.WarnLevel - case "info": - return logrus.InfoLevel - case "debug": - return logrus.DebugLevel - case "trace": - return logrus.TraceLevel - default: - return logrus.InfoLevel - } +// Error implements error. +func (i *InvalidLogrusLevel) Error() string { + return fmt.Sprintf("Invalid logrus level: %s", i.providedLevel) } -func (l *LogrusLevel) UnmarshalJSON(b []byte) error { - var level string - err := json.Unmarshal(b, &level) - if err != nil { - return err - } - - newVal, err := NewLogrusLevel(level) - if err != nil { - return err - } - - *l = newVal - - return nil +// Error implements error. +func (i *InvalidBehavior) Error() string { + return fmt.Sprintf("Invalid behaviour: %s", i.providedBehaviour) } diff --git a/internal/watcher/namespace.go b/internal/watcher/namespace.go index 3a3e7bb..c2b62eb 100644 --- a/internal/watcher/namespace.go +++ b/internal/watcher/namespace.go @@ -8,6 +8,7 @@ import ( "time" mc "github.com/massix/chaos-monkey/internal/apis/clientset/versioned" + "github.com/massix/chaos-monkey/internal/configuration" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/sirupsen/logrus" @@ -23,13 +24,15 @@ import ( type NamespaceWatcher struct { typedcorev1.NamespaceInterface record.EventRecorderLogger + Logrus logrus.FieldLogger Client kubernetes.Interface CmcClient mc.Interface - CrdWatchers map[string]Watcher Mutex *sync.Mutex + CrdWatchers map[string]Watcher metrics *nwMetrics RootNamespace string + Behavior configuration.Behavior CleanupTimeout time.Duration WatcherTimeout time.Duration Running bool @@ -132,7 +135,7 @@ func newNwMetrics(rootNamespace string) *nwMetrics { } } -func NewNamespaceWatcher(clientset kubernetes.Interface, cmcClientset mc.Interface, recorder record.EventRecorderLogger, rootNamespace string) Watcher { +func NewNamespaceWatcher(clientset kubernetes.Interface, cmcClientset mc.Interface, recorder record.EventRecorderLogger, rootNamespace string, behavior configuration.Behavior) Watcher { logrus.Infof("Creating new namespace watcher for namespace %s", rootNamespace) if clientset == nil { @@ -156,6 +159,7 @@ func NewNamespaceWatcher(clientset kubernetes.Interface, cmcClientset mc.Interfa metrics: newNwMetrics(rootNamespace), CleanupTimeout: 1 * time.Minute, RootNamespace: rootNamespace, + Behavior: behavior, Running: false, Client: clientset, CmcClient: cmcClientset, @@ -171,6 +175,33 @@ func (n *NamespaceWatcher) IsRunning() bool { return n.Running } +func (n *NamespaceWatcher) IsNamespaceAllowed(namespace *corev1.Namespace) bool { + label, ok := namespace.ObjectMeta.Labels[configuration.NamespaceLabel] + + // We allow all and there is no label + if n.Behavior == configuration.AllowAll && !ok { + return true + } + + // We deny all and there is no label + if n.Behavior == configuration.DenyAll && !ok { + return false + } + + // We deny all by default, the label is a whitelist (only if its value is "true") + if n.Behavior == configuration.DenyAll { + return label == "true" + } + + // We allow all by default, everything will let it through, except for "false" + if n.Behavior == configuration.AllowAll { + return label != "false" + } + + // We should never arrive here + return false +} + // Start implements Watcher. func (n *NamespaceWatcher) Start(ctx context.Context) error { var err error @@ -178,7 +209,7 @@ func (n *NamespaceWatcher) Start(ctx context.Context) error { defer n.Close() - n.Logrus.Infof("Starting watcher, timeout: %s", n.WatcherTimeout) + n.Logrus.Infof("Starting watcher, timeout: %s, behavior: %s", n.WatcherTimeout, n.Behavior) timeoutSeconds := int64(n.WatcherTimeout.Seconds()) w, err := n.Watch(ctx, v1.ListOptions{ @@ -220,6 +251,11 @@ func (n *NamespaceWatcher) Start(ctx context.Context) error { _ = n.Stop() case watch.Added: + if !n.IsNamespaceAllowed(ns) { + logrus.Infof("Not creating watcher for %s", ns.Name) + continue + } + n.Logrus.Infof("Adding watcher for namespace %s", ns.Name) if err := n.addWatcher(ns.Name); err != nil { logrus.Errorf("Error while trying to add CRD watcher: %s", err) @@ -231,6 +267,21 @@ func (n *NamespaceWatcher) Start(ctx context.Context) error { n.Eventf(ns, "Normal", "Added", "CRD Watcher added for %s", ns.Name) n.metrics.addedEvents.Inc() + case watch.Modified: + n.Logrus.Infof("Eventually modifying watcher for %s", ns.Name) + if n.IsNamespaceAllowed(ns) { + if err := n.addWatcher(ns.Name); err != nil { + n.Logrus.Warnf("Error while trying to add CRD watcher: %s", err) + } else { + n.Logrus.Infof("Starting newly created watcher for %s", ns.Name) + n.startCrdWatcher(ctx, ns.Name, &wg) + } + } else { + if err := n.removeWatcher(ns.Name); err != nil { + n.Logrus.Warnf("Error while trying to remove CRD watcher: %s", err) + } + } + case watch.Deleted: n.Logrus.Infof("Deleting watcher for namespace %s", ns.Name) if err := n.removeWatcher(ns.Name); err != nil { diff --git a/internal/watcher/namespace_test.go b/internal/watcher/namespace_test.go index 7f9bd5f..fe1a968 100644 --- a/internal/watcher/namespace_test.go +++ b/internal/watcher/namespace_test.go @@ -11,6 +11,7 @@ import ( typedcmc "github.com/massix/chaos-monkey/internal/apis/clientset/versioned" fakecmc "github.com/massix/chaos-monkey/internal/apis/clientset/versioned/fake" + "github.com/massix/chaos-monkey/internal/configuration" "github.com/massix/chaos-monkey/internal/watcher" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" @@ -64,7 +65,7 @@ func (f *FakeCrdWatcher) Close() error { var cmcClientset = fakecmc.NewSimpleClientset() func TestNamespaceWatcher_Create(t *testing.T) { - w := watcher.DefaultNamespaceFactory(kubernetes.NewSimpleClientset(), cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey") + w := watcher.DefaultNamespaceFactory(kubernetes.NewSimpleClientset(), cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey", configuration.AllowAll) defer w.Close() if w.IsRunning() { @@ -75,7 +76,7 @@ func TestNamespaceWatcher_Create(t *testing.T) { func TestNamespaceWatcher_BasicBehaviour(t *testing.T) { logrus.SetLevel(logrus.DebugLevel) clientSet := kubernetes.NewSimpleClientset() - w := watcher.DefaultNamespaceFactory(clientSet, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey").(*watcher.NamespaceWatcher) + w := watcher.DefaultNamespaceFactory(clientSet, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey", configuration.AllowAll).(*watcher.NamespaceWatcher) w.CleanupTimeout = 1 * time.Second // Inject my CRD Factory @@ -125,11 +126,10 @@ func TestNamespaceWatcher_BasicBehaviour(t *testing.T) { } // Foreach watcher, we should have 1 Start and 0 Stop called - for ns, crd := range w.CrdWatchers { + for _, crd := range w.CrdWatchers { crd := crd.(*FakeCrdWatcher) crd.Mutex.Lock() - t.Logf("Watcher: %s, Start: %d, Stop: %d", ns, crd.StartedTimes, crd.StoppedTimes) if s := crd.StartedTimes; s != 1 { t.Errorf("Expected 1 Start, got %d", s) } @@ -142,7 +142,6 @@ func TestNamespaceWatcher_BasicBehaviour(t *testing.T) { w.Mutex.Unlock() // Now stop the watcher - t.Log("Stopping watcher") if err := w.Stop(); err != nil { t.Error(err) } @@ -151,7 +150,6 @@ func TestNamespaceWatcher_BasicBehaviour(t *testing.T) { t.Errorf("Watcher should not be running") } - t.Log("Waiting for the watcher to terminate") <-done // We should have 0 watchers remaining @@ -163,7 +161,7 @@ func TestNamespaceWatcher_BasicBehaviour(t *testing.T) { func TestNamespaceWatcher_Error(t *testing.T) { logrus.SetLevel(logrus.DebugLevel) clientset := kubernetes.NewSimpleClientset() - w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey") + w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey", configuration.AllowAll) clientset.PrependWatchReactor("namespaces", func(action ktest.Action) (handled bool, ret watch.Interface, err error) { fakeWatch := watch.NewFake() @@ -185,8 +183,6 @@ func TestNamespaceWatcher_Error(t *testing.T) { go func() { if err := w.Start(context.Background()); err == nil || !strings.Contains(err.Error(), "Empty event or error from namespace watcher") { t.Errorf("Expected error, got %+v instead", err) - } else { - t.Logf("Expected: %s", err) } done <- struct{}{} @@ -205,7 +201,7 @@ func TestNamespaceWatcher_Error(t *testing.T) { func TestNamespaceWatcher_Cleanup(t *testing.T) { logrus.SetLevel(logrus.DebugLevel) clientset := kubernetes.NewSimpleClientset() - w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey").(*watcher.NamespaceWatcher) + w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey", configuration.AllowAll).(*watcher.NamespaceWatcher) w.CleanupTimeout = 1 * time.Second // Add some fake watchers @@ -258,7 +254,7 @@ func TestNamespaceWatcher_Cleanup(t *testing.T) { func TestNamespaceWatcher_RestartWatcher(t *testing.T) { clientset := kubernetes.NewSimpleClientset() - w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey") + w := watcher.DefaultNamespaceFactory(clientset, cmcClientset, record.NewFakeRecorder(1024), "chaos-monkey", configuration.AllowAll) w.(*watcher.NamespaceWatcher).CleanupTimeout = 1 * time.Second timeAsked := &atomic.Int32{} timeAsked.Store(0) @@ -305,3 +301,137 @@ func TestNamespaceWatcher_RestartWatcher(t *testing.T) { <-done } + +func TestNamespaceWatcher_ModifyNamespace(t *testing.T) { + fakeWatch := watch.NewFake() + clientset := kubernetes.NewSimpleClientset() + + clientset.PrependWatchReactor("namespaces", func(action ktest.Action) (handled bool, ret watch.Interface, err error) { + return true, fakeWatch, nil + }) + + watcher.DefaultCrdFactory = func(clientset k.Interface, cmcClientset typedcmc.Interface, recorder record.EventRecorderLogger, namespace string) watcher.Watcher { + return &FakeCrdWatcher{Mutex: &sync.Mutex{}} + } + + nsWithLabel := func(name, label string) *corev1.Namespace { + lbl := map[string]string{ + "cm.massix.github.io/namespace": label, + } + + return &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: name, Labels: lbl}, + } + } + + w := watcher.NewNamespaceWatcher(clientset, nil, record.NewFakeRecorder(1024), "chaosmonkey", configuration.AllowAll).(*watcher.NamespaceWatcher) + w.WatcherTimeout = 24 * time.Hour + w.CleanupTimeout = 300 * time.Millisecond + + done := make(chan interface{}) + defer close(done) + + go func() { + if err := w.Start(context.Background()); err != nil { + t.Error(err) + } + + done <- nil + }() + + checkWatchers := func(t *testing.T, num int) { + w.Mutex.Lock() + defer w.Mutex.Unlock() + + if cnt := len(w.CrdWatchers); cnt != num { + t.Errorf("Expected %d watchers, got %d", num, cnt) + } + } + + t.Run("AllowAll", func(t *testing.T) { + t.Run("ADD", func(t *testing.T) { + go func() { + // These should all pass + fakeWatch.Add(nsWithLabel("test-ok-1", "blabla")) + fakeWatch.Add(nsWithLabel("test-ok-2", "true")) + fakeWatch.Add(&corev1.Namespace{ObjectMeta: v1.ObjectMeta{Name: "test-ok-3"}}) + }() + + // We should have exactly 3 watchers registered + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 3) + + go func() { + // These should all be rejected + fakeWatch.Add(nsWithLabel("test-ko-1", "false")) + fakeWatch.Add(nsWithLabel("test-ko-2", "false")) + fakeWatch.Add(nsWithLabel("test-ko-3", "false")) + }() + + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 3) + }) + + t.Run("MODIFY", func(t *testing.T) { + go func() { + // Modifying an existing watcher should remove it from the list + fakeWatch.Modify(nsWithLabel("test-ok-1", "false")) + + // Modifying a non existing watcher should not panic + fakeWatch.Modify(nsWithLabel("test-notexisting", "false")) + }() + + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 2) + }) + }) + + // Change the behavior to "DenyAll" and reset the watcher + w.Mutex.Lock() + w.Behavior = configuration.DenyAll + w.CrdWatchers = map[string]watcher.Watcher{} + w.Mutex.Unlock() + + t.Run("DenyAll", func(t *testing.T) { + t.Run("ADD", func(t *testing.T) { + go func() { + // These should all be rejected + fakeWatch.Add(nsWithLabel("test-ko-1", "blabla")) + fakeWatch.Add(nsWithLabel("test-ko-2", "false")) + fakeWatch.Add(&corev1.Namespace{ObjectMeta: v1.ObjectMeta{Name: "test-ko-3"}}) + }() + + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 0) + + go func() { + // These should all be accepted + fakeWatch.Add(nsWithLabel("test-ok-1", "true")) + fakeWatch.Add(nsWithLabel("test-ok-2", "true")) + fakeWatch.Add(nsWithLabel("test-ok-3", "true")) + }() + + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 3) + }) + + t.Run("MODIFY", func(t *testing.T) { + go func() { + // Modifying an existing watcher should remove it from the list + fakeWatch.Modify(nsWithLabel("test-ok-1", "blabla")) + fakeWatch.Modify(nsWithLabel("test-ok-2", "false")) + fakeWatch.Modify(&corev1.Namespace{ObjectMeta: v1.ObjectMeta{Name: "test-ok-3"}}) + + // Modifying a watcher which is not in the list should not panic + fakeWatch.Modify(nsWithLabel("test-notexisting", "false")) + fakeWatch.Modify(&corev1.Namespace{ObjectMeta: v1.ObjectMeta{Name: "test-notexisting-2"}}) + }() + + time.Sleep(300 * time.Millisecond) + checkWatchers(t, 0) + }) + }) + + _ = w.Stop() + <-done +} diff --git a/internal/watcher/types.go b/internal/watcher/types.go index e2c1b6f..548e7d2 100644 --- a/internal/watcher/types.go +++ b/internal/watcher/types.go @@ -6,6 +6,7 @@ import ( "time" mc "github.com/massix/chaos-monkey/internal/apis/clientset/versioned" + "github.com/massix/chaos-monkey/internal/configuration" appsv1 "k8s.io/api/apps/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" @@ -28,7 +29,7 @@ type ConfigurableWatcher interface { } type ( - NamespaceFactory func(clientset kubernetes.Interface, cmcClientset mc.Interface, recorder record.EventRecorderLogger, rootNamespace string) Watcher + NamespaceFactory func(clientset kubernetes.Interface, cmcClientset mc.Interface, recorder record.EventRecorderLogger, rootNamespace string, behavior configuration.Behavior) Watcher CrdFactory func(clientset kubernetes.Interface, cmcClientset mc.Interface, recorder record.EventRecorderLogger, namespace string) Watcher DeploymentFactory func(clientset kubernetes.Interface, recorder record.EventRecorderLogger, deployment *appsv1.Deployment) ConfigurableWatcher PodFactory func(clientset kubernetes.Interface, recorder record.EventRecorderLogger, namespace string, labelSelector ...string) ConfigurableWatcher diff --git a/main.tf b/main.tf index db16d95..0c9e50d 100644 --- a/main.tf +++ b/main.tf @@ -45,6 +45,8 @@ resource "kind_cluster" "default" { resource "docker_image" "chaos-monkey-image" { name = "chaos-monkey:dev" + keep_locally = false + build { context = path.module dockerfile = "Dockerfile" @@ -82,9 +84,6 @@ resource "kubernetes_namespace" "chaosmonkey" { resource "kubernetes_namespace" "target-namespace" { metadata { name = "target" - labels = { - "chaosmonkey.massix.github.io/enabled" = "enabled" - } } } @@ -247,6 +246,10 @@ resource "kubernetes_deployment" "chaos-monkey-deployment" { name = "CHAOSMONKEY_LOGLEVEL" value = "debug" } + env { + name = "CHAOSMONKEY_BEHAVIOR" + value = "AllowAll" + } port { container_port = 9000 name = "metrics" diff --git a/tests/kubetest.sh b/tests/kubetest.sh index 73d5f66..63fd88b 100755 --- a/tests/kubetest.sh +++ b/tests/kubetest.sh @@ -174,6 +174,26 @@ checkMetric() { fi } +checkAllPodsNoChanges() { + local counter=0 + local cpList + local newCpList + + cpList=$(${KUBECTL} get pods -n target -o jsonpath='{.items[*].metadata.name}') + newCpList=$(${KUBECTL} get pods -n target -o jsonpath='{.items[*].metadata.name}') + + while [[ $counter -lt 5 ]]; do + if [[ "${cpList}" == "${newCpList}" ]]; then + info "Pods did not change ($((5 - counter)) loops left)" + sleep 10 + else + err "Pods have changed" + fi + + counter=$((counter + 1)) + done +} + debug "Checking kubectl @ ${KUBECTL}" if [[ -z "${KUBECTL}" ]]; then err "Please install kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl/" @@ -370,4 +390,80 @@ done debug "Stopping port-forward" kill -15 ${PF_PID} +info "Check Behavior" + +info "Patching namespace to disable ChaosMonkey" +if ! ${KUBECTL} patch namespace target --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ +{ "op": "add", "path": "/metadata/labels", "value": {"cm.massix.github.io/namespace": "false"} }, +] +JSONPATCH + err "Could not patch namespace" +fi + +# Wait for the ChaosMonkey to terminate +sleep 5 + +info "Checking that chaosmonkey is disabled for target namespace" +checkAllPodsNoChanges + +info "Patching deployment to inject DenyAll" +if ! ${KUBECTL} patch -n chaosmonkey deploy chaos-monkey --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ + { "op": "replace", "path": "/spec/template/spec/containers/0/env/1/value", "value": "DenyAll" }, +] +JSONPATCH + err "Could not patch Deployment" +fi + +debug "Waiting for deployment to restart" +if ! ${KUBECTL} rollout -n chaosmonkey restart deployment chaos-monkey >/dev/null 2>/dev/null; then + err "Could not restart deployment" +fi + +if ! ${KUBECTL} rollout -n chaosmonkey status deployment chaos-monkey >/dev/null 2>/dev/null; then + err "Could not wait for successful rollout" +fi + +info "Checking that chaosmonkey is still disabled" +checkAllPodsNoChanges + +info "Patch the CMC configurations to their initial values" +if ! ${KUBECTL} -n target patch cmc chaosmonkey-${disruptScale} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ + {"op": "replace", "path": "/spec/enabled", "value": true}, + {"op": "replace", "path": "/spec/podMode", "value": false}, + {"op": "replace", "path": "/spec/minReplicas", "value": 2}, + {"op": "replace", "path": "/spec/maxReplicas", "value": 4} +] +JSONPATCH + err "Could not patch CMC ${disruptScale}" +fi + +if ! ${KUBECTL} -n target patch cmc chaosmonkey-${disruptPods} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ + {"op": "replace", "path": "/spec/enabled", "value": true}, + {"op": "replace", "path": "/spec/podMode", "value": true}, + {"op": "replace", "path": "/spec/minReplicas", "value": 0}, + {"op": "replace", "path": "/spec/maxReplicas", "value": 1} +] +JSONPATCH + err "Could not patch CMC ${disruptPods}" +fi + +info "Patch the namespace to enable it" +if ! ${KUBECTL} patch namespace target --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ +{ "op": "replace", "path": "/metadata/labels/cm.massix.github.io~1namespace", "value": "true" }, +] +JSONPATCH + err "Could not patch namespace" +fi + +info "Check that the pods are changing again" +checkPods "app=${disruptPods}" + +info "Check that the replicas are changing again" +checkReplicas "${disruptScale}" + info "All tests passed!"