Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

seccomp: emulate safe privileged system calls #61

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/* Copyright © 2024 Arista Networks, Inc. All rights reserved.
*
* Use of this source code is governed by the MIT license that can be found
* in the LICENSE file.
*/

#ifndef ARCH_H_
# define ARCH_H_

# include "config.h"

# define ARCH_STR_(x) #x
# define ARCH_STR(x) ARCH_STR_(x)

/* *INDENT-OFF* - formatters try to add spaces here */
# define ARCH_HEADER_BASE arch/ARCH
/* *INDENT-ON* */

# include ARCH_STR(ARCH_HEADER_BASE/syscall.h)

#endif /* !ARCH_H_ */
89 changes: 89 additions & 0 deletions arch/aarch64/gen-syscall.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash

# This script generates the classic BPF program to intercept system calls
# in AArch64 userspace.

# From asm/unistd.h -- or you can use https://arm64.syscall.sh/ for new ones
declare -A syscalls=(
["mknodat"]="33"
)

prelude=(
# Check that we're running on AArch64
'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch)))'
'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 1, 0)'
'BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS)'

# Load syscall number
'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr)))'
)

syscall_jump=(
'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, $nr, 0, 1)'
'BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF)'
)

# NOTE: indentation is done with tabs. Do not use spaces, do not remove tabs,
# lest you break all HEREDOCs.

gen_source() {
cat <<-EOF
/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */

#include <stddef.h>
#include <linux/audit.h>
#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

const struct sock_filter syscall_filter[] = {
EOF

for stmt in "${prelude[@]}"; do
eval "echo $'\t'\"$stmt\","
done

for syscall in "${!syscalls[@]}"; do
nr=${syscalls[$syscall]}
for stmt in "${syscall_jump[@]}"; do
eval "echo $'\t'\"$stmt\","
done
done

echo $'\t''BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),'

cat <<-EOF
};

const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);

/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */
EOF
}

gen_header() {
cat <<-EOF
/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */

extern const struct sock_filter syscall_filter[];
extern const size_t syscall_filter_length;

EOF

max=0
for syscall in "${!syscalls[@]}"; do
echo "#define BST_NR_${syscall} ${syscalls[$syscall]}"
(( ${syscalls[$syscall]} > max )) && max=${syscalls[$syscall]}
done

cat <<-EOF

#define BST_NR_MAX $max

/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */
EOF
}

gen_source > arch/aarch64/syscall.c
gen_header > arch/aarch64/syscall.h

21 changes: 21 additions & 0 deletions arch/aarch64/syscall.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */

#include <stddef.h>
#include <linux/audit.h>
#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

const struct sock_filter syscall_filter[] = {
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 33, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};

const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);

/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */
10 changes: 10 additions & 0 deletions arch/aarch64/syscall.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */

extern const struct sock_filter syscall_filter[];
extern const size_t syscall_filter_length;

#define BST_NR_mknodat 33

#define BST_NR_MAX 33

/* THIS FILE WAS GENERATED BY arch/aarch64/gen-syscall.bash -- DO NOT EDIT */
130 changes: 130 additions & 0 deletions arch/x86/gen-syscall.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/bin/bash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I love bash as the tool here, but hey, I guess it's better than pulling a templating engine into the bst build!


# This script generates the classic BPF program to intercept system calls
# in x86 userspace.

# From asm/unistd_64.h
declare -A x86_64_syscalls=(
["mknod"]="133"
["mknodat"]="259"
)

# From asm/unistd_32.h
declare -A i386_syscalls=(
["mknod"]="14"
["mknodat"]="297"
)

prelude=(
# Check that we're running on x86_64 or i386
'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch)))'
'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_I386, $(($i386_offset-2)), 0)'
'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0)'
'BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS)'

# The x32 ABI (not to be confused with the i386 ABI!) uses the
# same system call numbers as x86_64, but set bit 30. Clear it so we share
# the same table.
'BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr)))'
'BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, X32_SYSCALL_BIT, 0, 1)'
'BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, X32_SYSCALL_BIT)'
)

syscall_jump=(
'BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, $nr, 0, 1)'
'BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF)'
)

i386_offset=$((${#prelude[@]} + ${#syscall_jump[@]}*${#x86_64_syscalls[@]} + 1))

# NOTE: indentation is done with tabs. Do not use spaces, do not remove tabs,
# lest you break all HEREDOCs.

gen_source() {
cat <<-EOF
/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */

#include <stddef.h>
#include <linux/audit.h>
#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

/* For the x32 ABI, all system call numbers have bit 30 set */
#define X32_SYSCALL_BIT 0x40000000

const struct sock_filter syscall_filter[] = {
EOF

for stmt in "${prelude[@]}"; do
eval "echo $'\t'\"$stmt\","
done

for syscall in "${!x86_64_syscalls[@]}"; do
nr=${x86_64_syscalls[$syscall]}
for stmt in "${syscall_jump[@]}"; do
eval "echo $'\t'\"$stmt\","
done
done

echo $'\t''BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),'
echo $'\t''BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),'

for syscall in "${!i386_syscalls[@]}"; do
nr=${i386_syscalls[$syscall]}
for stmt in "${syscall_jump[@]}"; do
eval "echo $'\t'\"$stmt\","
done
done

echo $'\t''BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),'

cat <<-EOF
};

const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);

/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
EOF
}

gen_header() {
cat <<-EOF
/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */

extern const struct sock_filter syscall_filter[];
extern const size_t syscall_filter_length;

EOF

for syscall in "${!x86_64_syscalls[@]}"; do
echo "#define BST_NR_${syscall} ${x86_64_syscalls[$syscall]}"
done

for syscall in "${!i386_syscalls[@]}"; do
echo "#define BST_NR_${syscall}_32 ${i386_syscalls[$syscall]}"
done

max=0
for syscall in "${!x86_64_syscalls[@]}"; do
(( ${x86_64_syscalls[$syscall]} > max )) && max=${x86_64_syscalls[$syscall]}
done

max32=0
for syscall in "${!i386_syscalls[@]}"; do
(( ${i386_syscalls[$syscall]} > max32 )) && max32=${i386_syscalls[$syscall]}
done

cat <<-EOF

#define BST_SECCOMP_32 1

#define BST_NR_MAX $max
#define BST_NR_MAX32 $max32

/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
EOF
}

gen_source > arch/x86/syscall.c
gen_header > arch/x86/syscall.h
35 changes: 35 additions & 0 deletions arch/x86/syscall.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid checking this in, given we generate it, and you've gone to the trouble of not needing anything other than bash to do so?


#include <stddef.h>
#include <linux/audit.h>
#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

/* For the x32 ABI, all system call numbers have bit 30 set */
#define X32_SYSCALL_BIT 0x40000000

const struct sock_filter syscall_filter[] = {
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_I386, 10, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, X32_SYSCALL_BIT, 0, 1),
BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, X32_SYSCALL_BIT),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 133, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 259, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 14, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 297, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};

const size_t syscall_filter_length = sizeof (syscall_filter) / sizeof (struct sock_filter);

/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
16 changes: 16 additions & 0 deletions arch/x86/syscall.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */

extern const struct sock_filter syscall_filter[];
extern const size_t syscall_filter_length;

#define BST_NR_mknod 133
#define BST_NR_mknodat 259
#define BST_NR_mknod_32 14
#define BST_NR_mknodat_32 297

#define BST_SECCOMP_32 1

#define BST_NR_MAX 259
#define BST_NR_MAX32 297

/* THIS FILE WAS GENERATED BY arch/x86/gen-syscall.bash -- DO NOT EDIT */
1 change: 1 addition & 0 deletions arch/x86_64
1 change: 1 addition & 0 deletions capable.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# define BST_CAP_SETUID ((uint64_t) 1 << CAP_SETUID)
# define BST_CAP_SETGID ((uint64_t) 1 << CAP_SETGID)
# define BST_CAP_SYS_CHROOT ((uint64_t) 1 << CAP_SYS_CHROOT)
# define BST_CAP_MKNOD ((uint64_t) 1 << CAP_MKNOD)

extern int deny_new_capabilities;

Expand Down
7 changes: 6 additions & 1 deletion config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@
# define LIBEXECDIR "@libexecdir@"
# define VERSION "@version@"

#mesondefine ARCH
#mesondefine ARCH_X86
#mesondefine ARCH_X86_64

#mesondefine HAVE_SECCOMP_UNOTIFY
#mesondefine HAVE_SYSTEMD
#mesondefine HAVE_SYS_mount_setattr
#mesondefine HAVE_close_range
#mesondefine HAVE_SYSTEMD

#endif /* !CONFIG_H_ */
14 changes: 14 additions & 0 deletions enter.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
#include "bst_limits.h"
#include "capable.h"
#include "compat.h"
#include "config.h"
#include "enter.h"
#include "errutil.h"
#include "fd.h"
#include "mount.h"
#include "net.h"
#include "ns.h"
Expand All @@ -40,6 +42,10 @@
#include "util.h"
#include "fd.h"

#ifdef HAVE_SECCOMP_UNOTIFY
# include "sec.h"
#endif

static inline size_t append_argv(char **argv, size_t argc, char *arg)
{
if (argc >= ARG_MAX) {
Expand Down Expand Up @@ -456,6 +462,14 @@ int enter(struct entry_settings *opts)
}
ns_enter_postfork(namespaces, ns_len);

#ifdef HAVE_SECCOMP_UNOTIFY
int seccomp_fd = sec_seccomp_install_filter();
if (seccomp_fd != -1) {
send_fd(outer_helper.fd, seccomp_fd);
close(seccomp_fd);
}
#endif

outer_helper_close(&outer_helper);

int rtnl = init_rtnetlink_socket();
Expand Down
Loading
Loading