Add Ruby wrapper around libseccomp.

Add a Ruby wrapper library around the libseccomp library. Seccomp is
used to restrict the syscalls a program can make. See comments in
app/logical/seccomp.rb for further details.

This is not used for anything yet. It's simply adding part of the
sandboxing infrastructure for later use.
This commit is contained in:
evazion
2021-11-11 08:52:50 -06:00
parent 3f9a85a828
commit 908df7921f
4 changed files with 536 additions and 0 deletions

View File

@@ -55,6 +55,7 @@ gem "rack-timeout", require: "rack/timeout/base"
gem "parallel"
gem "pry-byebug"
gem "pry-rails"
gem "ffi"
group :development do
gem 'rubocop', require: false

View File

@@ -508,6 +508,7 @@ DEPENDENCIES
ed25519
factory_bot
ffaker
ffi
flamegraph
google-cloud-bigquery
google-cloud-storage

533
app/logical/seccomp.rb Normal file
View File

@@ -0,0 +1,533 @@
# This is a wrapper around seccomp, a Linux kernel feature used to limit the
# system calls the current process is allowed to make. This is used for
# sandboxing code when processing user-uploaded files.
#
# @example
# # Allow only the read(2), write(2), close(2), and exit_group(2) syscalls to be used
# # for the remainder of the program; kill the process if any other syscalls are called.
# Seccomp.allow!("read write close exit_group")
#
# # Kill the process if sync(2) is called; allow all other syscalls.
# Seccomp.deny!("sync")
#
# # Run exiftool in a sandboxed subprocess, allowing it to only use syscalls
# # from the @exec, @signals, and @tty syscall groups.
# Seccomp.allow!("@exec @signals @tty") do
# exec "exiftool -json image.jpg"
# end
#
# # Run a shell inside a seccomp sandbox.
# Seccomp.allow!("@common") { exec "dash" }
#
# # Print a human-readable dump of the seccomp filter.
# puts Seccomp.allow("@exec @signals @tty").to_pfc
#
# # Show all available syscalls.
# puts Seccomp.syscalls
#
# Documentation:
#
# @see https://en.wikipedia.org/wiki/Seccomp
# @see https://lwn.net/Articles/656307/ A seccomp overview
# @see https://lwn.net/Articles/494252/ A library for seccomp filters
# @see https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html
# @see https://man7.org/linux/man-pages/man2/seccomp.2.html
# @see https://github.com/seccomp/libseccomp
# @see https://blog.cloudflare.com/sandboxing-in-linux-with-zero-lines-of-code/
# @see https://docs.docker.com/engine/security/seccomp/
# @see https://kubernetes.io/docs/tutorials/clusters/seccomp/
# @see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#System%20Call%20Filtering
#
# Related projects:
#
# @see https://github.com/cloudflare/sandbox
# @see https://github.com/david942j/seccomp-tools
# @see https://man.openbsd.org/pledge.2
# @see https://dev.exherbo.org/~alip/sydbox/
#
# Syscall lists:
#
# @see https://github.com/seccomp/libseccomp/blob/main/src/syscalls.csv
# @see https://github.com/systemd/systemd/blob/main/src/shared/seccomp-util.c#L281
# @see https://github.com/torvalds/linux/blob/master/arch/x86/entry/syscalls/syscall_64.tbl
# @see https://marcin.juszkiewicz.com.pl/download/tables/syscalls.html
# @see https://filippo.io/linux-syscall-table/
module Seccomp
class Error < StandardError; end
# Symbolic groups of syscalls that can be used in filters.
SYSCALL_GROUPS = {
# A broad set of common syscalls sufficient to run most programs.
"@common" => %w[
@exec @exit @fs @memory @network @process-control @process-info @signals
@stdio @system-info @threads @time @tty
],
# Syscalls needed to cleanly exit a Ruby program.
"@exit" => %w[
exit exit_group getpid munmap rt_sigaction timer_delete
],
# Syscalls needed to allocate and manage memory.
"@memory" => %w[
brk mmap mmap2 munmap mprotect mremap
],
# Syscalls needed by multi-threaded Ruby programs.
"@threads" => %w[
futex getpid mmap ppoll read write sched_yield
],
# Syscalls commonly needed to execute external programs.
"@exec" => %w[
@memory
@stdio
@fs-read
@process-info
@exit
arch_prctl
execve execveat
futex
set_robust_list
set_tid_address
prlimit64
timer_settime
],
# Syscalls for reading and writing open files.
"@stdio" => %w[
close
dup dup2 dup3
getdents getdents64
fadvise64
fcntl
fgetxattr
fstat
lseek
pipe pipe2
read pread64 readv preadv preadv2
write pwrite64 writev pwritev pwritev2
],
# Syscalls for opening files.
"@fs-open" => %w[
open openat openat2
],
# Syscalls that read information from the filesystem.
"@fs-read" => %w[
@fs-open
access faccessat faccessat2
chdir fchdir
getcwd
getxattr lgetxattr fgetxattr
readlink readlinkat
stat fstat newfstatat lstat
statfs fstatfs
],
# Syscalls that modify data on the filesystem.
"@fs-write" => %w[
@fs-read
creat
fallocate
link linkat
mkdir mkdirat
rename renameat renameat2
rmdir
symlink symlinkat
truncate ftruncate
umask
unlink unlinkat
],
# Syscalls that modify metadata on the filesystem.
"@fs-attr" => %w[
@fs-write
chmod fchmod fchmodat
chown fchown fchownat lchown
setxattr lsetxattr fsetxattr
utime utimes utimensat futimesat
],
# Syscalls for reading or writing to the filesystem.
"@fs" => %w[
@stdio @fs-attr
],
"@evented-io" => %w[
epoll_create epoll_create1 epoll_ctl epoll_wait epoll_pwait
eventfd eventfd2
poll ppoll
select pselect6
],
"@network" => %w[
socket socketpair
accept accept4
bind
connect
listen
shutdown
recv recvfrom recvmsg recvmmsg recvmmsg_time64
send sendto sendmsg sendmmsg
getpeername
getsockname
getsockopt setsockopt
],
"@process-info" => %w[
capget
getpid getppid
getpgid getpgrp
getsid gettid
getuid geteuid getresuid
getgid getegid getresgid getgroups
sched_getaffinity
times
],
"@process-control" => %w[
clone clone3 fork vfork
getpriority setpriority
kill tkill tgkill rt_sigqueueinfo rt_tgsigqueueinfo
nice
pidfd_open pidfd_send_signal
prlimit64
setpgid
wait4 waitid waitpid
],
"@signals" => %w[
alarm
rt_sigaction sigaction
rt_sigpending sigpending
rt_sigprocmask sigprocmask
rt_sigsuspend sigsuspend
rt_sigtimedwait rt_sigtimedwait_time64
rt_sigreturn
signalfd signalfd4
sigaltstack
signal
pause
],
"@system-info" => %w[
sysinfo
uname
],
"@time" => %w[
nanosleep clock_nanosleep
clock_getres
clock_gettime
gettimeofday
time
],
"@tty" => %w[
ioctl
],
}
# A lowlevel wrapper around libseccomp using the Ruby FFI.
#
# https://github.com/ffi/ffi/wiki
# https://github.com/seccomp/libseccomp
module LibSeccomp
extend FFI::Library
ffi_lib "libseccomp"
# https://github.com/seccomp/libseccomp/blob/main/include/seccomp.h.in#L121
enum :arch, [:native, 0]
# https://github.com/seccomp/libseccomp/blob/main/include/seccomp.h.in#L332
enum FFI::Type::UINT32, :action, [
:kill, 0x80000000,
:kill_process, 0x80000000,
:kill_thread, 0x00000000,
:log, 0x7ffc0000,
:allow, 0x7fff0000,
]
# https://github.com/seccomp/libseccomp/blob/main/include/seccomp.h.in#L64
enum :attr, [
:tsync, 4,
:optimize, 8,
]
typedef :pointer, :scmp_filter_ctx
# seccomp_init - Initialize the seccomp filter state
# https://man7.org/linux/man-pages/man3/seccomp_init.3.html
# scmp_filter_ctx seccomp_init(uint32_t def_action);
attach_function :seccomp_init, [:action], :scmp_filter_ctx
# seccomp_load - Load the current seccomp filter into the kernel
# https://man7.org/linux/man-pages/man3/seccomp_load.3.html
# int seccomp_load(scmp_filter_ctx ctx);
attach_function :seccomp_load, [:scmp_filter_ctx], :int
# seccomp_release - Release the seccomp filter state
# https://man7.org/linux/man-pages/man3/seccomp_release.3.html
# void seccomp_release(scmp_filter_ctx ctx);
attach_function :seccomp_release, [:scmp_filter_ctx], :void
# seccomp_rule_add - Add a seccomp filter rule
# https://man7.org/linux/man-pages/man3/seccomp_rule_add.3.html
# int seccomp_rule_add(scmp_filter_ctx ctx, uint32_t action, int syscall, unsigned int arg_cnt, ...);
attach_function :seccomp_rule_add, [:scmp_filter_ctx, :action, :int, :uint32], :int
# seccomp_syscall_resolve_name - Resolve a syscall name to a number
# https://man7.org/linux/man-pages/man3/seccomp_syscall_resolve_name.3.html
# int seccomp_syscall_resolve_name(const char *name);
attach_function :seccomp_syscall_resolve_name, [:string], :int
# seccomp_syscall_resolve_name - Resolve a syscall number to a name
# https://man7.org/linux/man-pages/man3/seccomp_syscall_resolve_num_arch.3.html
# char* seccomp_syscall_resolve_num_arch(uint32_t arch_token, int num)
attach_function :seccomp_syscall_resolve_num_arch, [:arch, :int], :strptr
# seccomp_attr_set - Manage the seccomp filter attributes
# https://man7.org/linux/man-pages/man3/seccomp_attr_set.3.html
# int seccomp_attr_set(scmp_filter_ctx ctx, enum scmp_filter_attr attr, uint32_t value);
attach_function :seccomp_attr_set, [:scmp_filter_ctx, :attr, :uint32], :int
# seccomp_export_bpf - Export the seccomp filter as BPF
# int seccomp_export_bpf(const scmp_filter_ctx ctx, int fd);
# https://man7.org/linux/man-pages/man3/seccomp_export_bpf.3.html
attach_function :seccomp_export_bpf, [:scmp_filter_ctx, :int], :int
# seccomp_export_pfc - Export the seccomp filter as PFC
# int seccomp_export_pfc(const scmp_filter_ctx ctx, int fd);
# https://man7.org/linux/man-pages/man3/seccomp_export_pfc.3.html
attach_function :seccomp_export_pfc, [:scmp_filter_ctx, :int], :int
end
module LibC
extend FFI::Library
ffi_lib FFI::Library::LIBC
attach_function :free, [:pointer], :void
end
# A Seccomp::Filter represents a single seccomp filter, containing a set of
# syscall filtering rules and a default action.
class Filter
attr_reader :context, :tsync, :optimize
# Create a new syscall filter. Use `add_rule` to add rules to the filter.
# Use `apply!` to activate the filter after all rules have been added.
#
# If a block is given, run the block with the new filter.
#
# @param default_action [Symbol] The default action to take when a syscall
# doesn't match a rule.
# @param tsync [Boolean] True to apply the filter to all threads in the
# current process. False to apply it just to the current thread.
# @param optimize [Boolean] True to generate the filter as a binary tree,
# false as a sequential list.
def initialize(default_action = :kill, tsync: true, optimize: false)
@context = init!(default_action)
self.tsync = tsync
self.optimize = optimize
yield self if block_given?
end
# Add a syscall rule to the filter. If the syscall doesn't exist, raise an error.
#
# @param syscall_name [String] The name of the syscall
# @param action [Symbol] The action to take when the syscall is called (:allow, :log, :kill)
# @return [self]
# @raise [SystemCallError] If the rule couldn't be added
def add_rule(syscall_name, action)
syscall_number = Seccomp.resolve_syscall_name(syscall_name)
ret = LibSeccomp.seccomp_rule_add(context, action, syscall_number, 0)
raise SystemCallError.new("seccomp_rule_add(#{action}, #{syscall_name}) failed", -ret) if ret < 0
self
end
# Activate the syscall filter by loading it into the kernel. All code after
# this point must obey the syscall filter.
#
# If a block is given, apply the filter to the given block of code. The
# block is run in a forked subprocess, which means the filter only applies
# to the block of code.
#
# @return [self]
def apply!(&block)
return apply_to!(&block) if block_given?
ret = LibSeccomp.seccomp_load(context)
raise SystemCallError.new("seccomp_load(#{context}) failed", -ret) if ret < 0
self
end
# Apply the filter to a block of code in a forked subprocess.
def apply_to!(&block)
raise ArgumentError, "Seccomp::Filter#apply_block!: block required" unless block_given?
pid = Process.fork do
apply!
yield self
end
pid, status = Process.wait2(pid)
if status.signaled? && Signal.signame(status.termsig) == "SYS"
raise Error, "Subprocess called unauthorized syscall (see dmesg for details)"
end
self
end
# Return a string representing the filter in BPF (Berkeley Packet Filter) format.
#
# @return [String]
def to_bpf
IO.pipe do |reader, writer|
ret = LibSeccomp.seccomp_export_bpf(context, writer.fileno)
raise SystemCallError.new("seccomp_export_bpf() failed", -ret) if ret < 0
writer.close
reader.read
end
end
# Return a string representing the filter in PFC (Pseudo Filter Code) format.
#
# @return [String]
def to_pfc
IO.pipe do |reader, writer|
ret = LibSeccomp.seccomp_export_pfc(context, writer.fileno)
raise SystemCallError.new("seccomp_export_pfc() failed", -ret) if ret < 0
writer.close
reader.read
end
end
protected
# Create a new libseccomp context.
#
# @param default_action [Symbol] The default_action for the context
# @return [FFI::AutoPointer] The libseccomp context
def init!(default_action)
context = LibSeccomp.seccomp_init(default_action)
raise Errno::ENOMEM, "seccomp_init(#{default_action}) failed" if context == nil
FFI::AutoPointer.new(context, LibSeccomp.method(:seccomp_release))
end
# Set an attribute on the filter.
#
# @see https://man7.org/linux/man-pages/man3/seccomp_attr_set.3.html
# @param [Symbol] the attribute name
# @param [Integer] the attribute value
def set_attr(attr, value)
ret = LibSeccomp.seccomp_attr_set(context, attr, value)
raise SystemCallError.new("seccomp_attr_set(context, #{attr.inspect}, #{value}) failed", -ret) if ret < 0
end
# If true, apply the filter to all threads in the process. If false,
# apply it only to the current thread.
#
# @param value [Boolean]
# @return [void]
def tsync=(value)
set_attr(:tsync, value ? 1 : 0)
@tsync = value
end
# If true, generate the BPF code as a binary tree of if-else statements.
# May be faster for large rule sets. If false, generate the BPF code as a
# sequential list of if-else statements.
#
# @param value [Boolean]
# @return [void]
def optimize=(value)
set_attr(:optimize, value ? 2 : 1)
@optimize = value
end
end
# Create a filter allowing only the given set of syscalls. Deny all other
# syscalls by default. Calling a denied syscall will kill the process by default.
def self.allow(syscalls, default_action: :kill)
filter(syscalls, :allow, default_action: default_action)
end
# Create a filter denying the given set of syscalls. Allow all other
# syscalls by default. Calling a denied syscall will kill the process by default.
def self.deny(syscalls, default_action: :allow)
filter(syscalls, :kill, default_action: default_action)
end
# Create and immediately apply a filter allowing only the given set of syscalls.
def self.allow!(syscalls, default_action: :kill, &block)
allow(syscalls, default_action: default_action).apply!(&block)
end
# Create and immediately apply a filter denying the given set of syscalls.
def self.deny!(syscalls, default_action: :allow, &block)
deny(syscalls, default_action: default_action).apply!(&block)
end
# Create a syscall filter for the current process that performs `action` when
# any of the given syscalls are called, or `default_action` when any other
# syscall is called.
#
# Call `apply!` on the result to activate the filter.
#
# @param syscalls [Array<String>] The set of syscalls
# @param action [Symbol] The action to take when any of the given syscalls are called (:allow, :log, :kill)
# @param default_action [Symbol] The action to take when any other syscall is called (:allow, :log, :kill)
# @param options [Hash] Options to pass to Seccomp::Filter#initialize
# @return [Seccomp::Filter] the seccomp filter
def self.filter(syscalls, action, default_action: :kill, **options)
Filter.new(default_action, **options) do |filter|
expand_syscall_names(syscalls).each do |syscall|
filter.add_rule(syscall, action)
end
end
end
# Return the list of syscalls available on the current system. This list may
# vary depending on the CPU architecture and kernel version.
#
# @return [Hash<Integer, String>] a hash of syscall numbers to syscall names
def self.syscalls
@syscalls ||= 0.upto(8192).map do |n|
[n, resolve_syscall_number(n) ]
end.to_h.compact
end
# Recursively expand a list of syscall names, that may contain a mixture of regular
# names and syscall group names (e.g. `@stdio`), to a flat list of syscall names.
#
# @param syscall_names [Array<String>] A list of syscall names. May include syscall
# groups (e.g `@stdio`). May be a space-separated string, or a list of strings.
# @return [Array<String>] A list of syscall names
def self.expand_syscall_names(*syscall_names)
syscall_names.flatten.flat_map(&:split).flat_map do |syscall|
if syscall.start_with?("@")
group = SYSCALL_GROUPS.fetch(syscall)
expand_syscall_names(group)
else
syscall
end
end.sort.uniq
end
# Resolve a syscall name to a syscall number.
#
# May return a negative number if the syscall exists on another architecture,
# but not on this architecture. For example, `arch_prctl` exists on x86 but
# not on ARM or other architectures.
#
# Raises an error if the syscall doesn't exist on any architecture.
#
# @param [String] the syscall name
# @return [Integer] the syscall number
# @raise [Errno::EINVAL] if the syscall doesn't exist
def self.resolve_syscall_name(syscall_name)
syscall_number = LibSeccomp.seccomp_syscall_resolve_name(syscall_name.to_s)
raise Errno::EINVAL, "Syscall '#{syscall_name}' doesn't exist" if syscall_number == -1
syscall_number
end
# Resolve a syscall number to a syscall name.
#
# @param syscall_number [Integer] The syscall number
# @param arch [Symbol] The CPU architecture (x86_64, aarch64, etc)
# @return [String, nil] The syscall name, or nil if a syscall by that number doesn't exist
def self.resolve_syscall_number(syscall_number, arch = :native)
name, ptr = LibSeccomp.seccomp_syscall_resolve_num_arch(arch, syscall_number)
name
ensure
LibC.free(ptr)
end
end

View File

@@ -23,6 +23,7 @@ DANBOORU_RUNTIME_DEPS="
ca-certificates mkvtoolnix rclone libpq5
zlib1g libfftw3-3 libwebp6 libwebpmux3 libwebpdemux2 liborc-0.4.0 liblcms2-2
libpng16-16 libjpeg-turbo8 libexpat1 libglib2.0 libgif7 libexif12 libvpx6
libseccomp2
"
COMMON_RUNTIME_DEPS="
$DANBOORU_RUNTIME_DEPS $EXIFTOOL_RUNTIME_DEPS tini busybox less ncdu