Merge pull request chinmina#66 from jamestelfer/jamestelfer/issue12

feat: add audit logging to the request path
cultureamp · Oct 6, 2024 · 27cb64d · 27cb64d
2 parents 7b64842 + 36a3fd1
commit 27cb64d
Show file tree

Hide file tree

Showing 12 changed files with 688 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -145,6 +145,11 @@ To understand what's right for your organization, consider:
   - it's OK if your organization controls the creation/configuration of
     pipelines: this restricts the opportunity to misconfigure a pipeline.
 
+## Operations
+
+See the [observability documentation](./docs/observability.md) for more details
+on the information provided by the system when running.
+
 ## Configuration
 
 Requirements:

diff --git a/codecov.yml b/codecov.yml
@@ -1,6 +1,8 @@
 ignore:
   # testing utility
   - "cmd/create"
+  # has sufficient secondary coverage
+  - "internal/audit/response.go"
   # telemetry configuration
   - "internal/observe/telemetry.go"
   # bootstrap only

diff --git a/docs/observability.md b/docs/observability.md
@@ -0,0 +1,58 @@
+# Observability
+
+The system produces traces and metrics via Open Telemetry, and logs via zerolog.
+There are minimal informational logs, as well as per-request audit logs that are
+written to the process's `stdout`.
+
+## Audit logs
+
+Audit logs provide a level of non-repudiation for the system. These logs are
+written to the container's stdout, and cannot be disabled.
+
+> [!TIP]
+> Requests to non-existent routes do not form part of the audit log. Access logs
+> or firewall logs are better sources for this information.
+
+Each authenticated endpoint (both `/token` and `/git-credentials`) will record
+details about the request, the authorization process, and the GitHub token
+created. If an error occurs, this is also written out.
+
+At a technical level, logs are written to stdout using zerolog at the "audit"
+log level. Initial data is collected by request middleware and the partial entry
+is then accessible via the context. The context entry is further enriched with
+details by other components, including both the JWT middleware and the vendor.
+such that the log is fully completed by the end of the request.
+
+> [!IMPORTANT]
+> A panic in the request chain will still result in the audit log being written,
+> and the panic details will also be included.
+
+### Audit log fields
+
+1. Request data
+    - `Method`: the request method. This will currently be `GET` for all standard requests.
+    - `Path`: the requested path.
+    - `Status` is the HTTP response status of the request
+    - `SourceIP` is the client IP of the requestor
+    - `UserAgent` is the user agent reported by the client
+    - `Error` is the error produced by the request. This may come from internal
+      errors or panics, as well as the JWT validation and token creation
+      components.
+2. Authorization data
+    - `Authorized` is a boolean that is `true` when the request JWT is
+      successfully authorized by the service.
+    - `AuthSubject` is the contents of the `sub` field from the JWT
+    - `AuthIssuer` is the JWT `iss` field
+    - `AuthAudience` is the (possibly multiple) reported `aud` field values from
+      the JWT
+    - `AuthExpirySecs` is the JWT expiry time in seconds after the Unix epoch
+3. Token data
+    - `Repositories` is the set of repositories that the token allows access to
+    - `Permissions` is the set of GitHub token permissions assigned to the token
+    - `ExpirySecs` is the GitHub token expiry time in seconds after the Unix
+      epoch
+
+## Open Telemetry
+
+This section is a stub. For now, refer to the [`.envrc`](../.envrc) file for
+details of all Open Telemetry related configuration that's currently possible.
diff --git a/internal/audit/log.go b/internal/audit/log.go
@@ -0,0 +1,188 @@
+package audit
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+)
+
+// marker for interface implementation
+var _ zerolog.LogObjectMarshaler = (*Entry)(nil)
+
+// marker for context key
+type key struct{}
+
+const (
+	// Level is the log level at which audit logs are written.
+	Level = zerolog.Level(20)
+)
+
+var (
+	// logKey is the key used to store the audit log entry in the context.
+	logKey = key{}
+)
+
+// Entry is an audit log entry for the current request.
+type Entry struct {
+	Method           string
+	Path             string
+	Status           int
+	SourceIP         string
+	UserAgent        string
+	RequestedProfile string
+	Authorized       bool
+	AuthSubject      string
+	AuthIssuer       string
+	AuthAudience     []string
+	AuthExpirySecs   int64
+	Error            string
+	Repositories     []string
+	Permissions      []string
+	ExpirySecs       int64
+}
+
+// MarshalZerologObject implements zerolog.LogObjectMarshaler. This avoids the
+// need for reflection when logging, at the cost of requiring maintenance when
+// the Entry struct changes.
+func (e *Entry) MarshalZerologObject(event *zerolog.Event) {
+	event.Str("method", e.Method).
+		Str("path", e.Path).
+		Int("status", e.Status).
+		Str("sourceIP", e.SourceIP).
+		Str("userAgent", e.UserAgent).
+		Str("requestedProfile", e.RequestedProfile).
+		Bool("authorized", e.Authorized).
+		Str("authSubject", e.AuthSubject).
+		Str("authIssuer", e.AuthIssuer).
+		Str("error", e.Error)
+
+	now := time.Now()
+	if e.AuthExpirySecs > 0 {
+		exp := time.Unix(e.AuthExpirySecs, 0)
+		remaining := exp.Sub(now).Round(time.Millisecond)
+		event.Time("authExpiry", exp)
+		event.Dur("authExpiryRemaining", remaining)
+	}
+
+	if e.ExpirySecs > 0 {
+		exp := time.Unix(e.ExpirySecs, 0)
+		remaining := exp.Sub(now).Round(time.Millisecond)
+		event.Time("expiry", exp)
+		event.Dur("expiryRemaining", remaining)
+	}
+
+	if len(e.AuthAudience) > 0 {
+		event.Strs("authAudience", e.AuthAudience)
+	}
+
+	if len(e.Repositories) > 0 {
+		event.Strs("repositories", e.Repositories)
+	}
+
+	if len(e.Permissions) > 0 {
+		event.Strs("permissions", e.Permissions)
+	}
+}
+
+// Begin sets up the audit log entry for the current request with details from the request.
+func (e *Entry) Begin(r *http.Request) {
+	e.Path = r.URL.Path
+	e.Method = r.Method
+	e.UserAgent = r.UserAgent()
+	e.SourceIP = r.RemoteAddr
+}
+
+// End writes the audit log entry. If the returned func is deferred, any panic
+// will be recovered so the log entry can be written before the panic is
+// re-raised.
+func (e *Entry) End(ctx context.Context) func() {
+	return func() {
+		// recover from panic if necessary
+		r := recover()
+		if r != nil {
+			// record the details of the panic, attempting to avoid overwriting an
+			// earlier error
+			e.Status = http.StatusInternalServerError
+			err := fmt.Sprintf("panic: %v", r)
+			if e.Error != "" {
+				e.Error += "; "
+			}
+			e.Error += err
+		}
+
+		// OK is the default if the status is not set when the response is written.
+		if e.Status == 0 {
+			e.Status = http.StatusOK
+		}
+
+		zerolog.Ctx(ctx).WithLevel(Level).EmbedObject(e).Str("type", "audit").Msg("audit_event")
+
+		if r != nil {
+			// repanic the panic
+			panic(r)
+		}
+	}
+}
+
+// Middleware is an HTTP middleware that creates a new audit log entry for the
+// current request and enriches it with information about the request. The log
+// entry is written to the log when the request is complete.
+//
+// A panic during the request will be recovered and logged as an error in the
+// audit entry. The HTTP status code of the response is also logged in the audit
+// entry; further details may be added by the application.
+func Middleware() func(next http.Handler) http.Handler {
+	zerologConfiguration()
+
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			ctx, entry := Context(r.Context())
+
+			// wrap the response writer to capture the status code
+			response := wrapResponseWriter(w, Log(ctx))
+
+			entry.Begin(r)
+			defer entry.End(ctx)()
+
+			next.ServeHTTP(response, r.WithContext(ctx))
+		})
+	}
+}
+
+// Get the log entry for the current request. This is safe to use even if the
+// context does not create an entry.
+func Log(ctx context.Context) *Entry {
+	_, e := Context(ctx)
+	return e
+}
+
+// Context returns the Entry for the current request, creating one if it
+// does not exist. If the returned context is kept, the returned entry can be
+// further enriched. If not, information written to the entry will be lost.
+func Context(ctx context.Context) (context.Context, *Entry) {
+	e, ok := ctx.Value(logKey).(*Entry)
+	if !ok {
+		e = &Entry{}
+
+		ctx = context.WithValue(ctx, logKey, e)
+	}
+
+	return ctx, e
+}
+
+func zerologConfiguration() {
+	// configure the console writer
+	zerolog.FormattedLevels[Level] = "AUD"
+
+	// format the audit level as "audit", falling back to the default
+	marshal := zerolog.LevelFieldMarshalFunc
+	zerolog.LevelFieldMarshalFunc = func(l zerolog.Level) string {
+		if l == Level {
+			return "audit"
+		}
+		return marshal(l)
+	}
+}