Skip to content
Snippets Groups Projects

Drop the LXC OSTree containers

Merged Emanuele Aina requested to merge wip/em/drop-lxc-ostree-containers into apertis/v2019dev0
8 files
+ 3
599
Compare changes
  • Side-by-side
  • Inline
Files
8
+ 0
465
#!/bin/sh
# Template for Apertis OStree-based container images.
#
# Copyright © 2017 Collabora Ltd.
set -u
# Apertis options
LONGOPTS="ospack:,force"
SHORTOPTS="o:f"
#LXC internal options
LONGOPTS="$LONGOPTS,name:,path:,rootfs:,mapped-uid:,mapped-gid:"
OPTS=$(getopt -o $SHORTOPTS -l $LONGOPTS -- "$@")
eval set -- "$OPTS"
# mandatory option
OSPACK=
FORCED=0
LXC_NAME=
LXC_PATH=
LXC_ROOTFS=
LXC_MAPPED_UID=
LXC_MAPPED_GID=
# Check if all needed binaries are available
BINARIES="wget tar"
rc=0
for f in $BINARIES; do
which $f 1>/dev/null 2>&1 && continue || :
echo "$f is required"
rc=1
done
[ $rc -eq 0 ] || exit 1
# Check options
while true; do
case "$1" in
-o|--ospack) OSPACK="$2"; shift 2;;
-f|--force) FORCED=1; shift 1;;
--name) LXC_NAME="$2"; shift 2;;
--path) LXC_PATH="$2"; shift 2;;
--rootfs) LXC_ROOTFS="$2"; shift 2;;
--mapped-uid) LXC_MAPPED_UID="$2"; shift 2;;
--mapped-gid) LXC_MAPPED_GID="$2"; shift 2;;
*) break;;
esac
done
if [ -z "$OSPACK" ]; then
echo "Please provide URL to download initial rootfs with '--ospack' argument."
exit 1
fi
TARBALL="${OSPACK##*/}"
if [ ! -f "$TARBALL" -o $FORCED -eq 1 ]; then
[ -f "$TARBALL" ] && rm -f "$TARBALL"
wget "$OSPACK"
fi
tar -x --exclude='dev/*' -f "$TARBALL" -C "$LXC_ROOTFS" >/dev/null 2>&1
lxc_conf_utsname="uts.name"
lxc_conf_pts="pty.max"
lxc_conf_net="net.0"
lxc_conf_net_ipv4="net.0.ipv4.address"
lxc_conf_apparmor="apparmor.profile"
# For different versions
LXC_VER=$(lxc-start --version | sed s/"\."//g)
if [ $LXC_VER -lt 210 ]; then
# Old syntax
lxc_conf_utsname="utsname"
lxc_conf_pts="pts"
lxc_conf_net="network"
lxc_conf_net_ipv4="network.ipv4"
lxc_conf_apparmor="aa_profile"
fi
#################### Config generation #############################
# Apertis specific configuration
# NB: Rootfs is added by lxc-create
cat <<E_O_F >> "$LXC_PATH"/config
lxc.$lxc_conf_utsname = $LXC_NAME
# Include default LXC configuration
lxc.include = /usr/share/lxc/config/common.conf
lxc.$lxc_conf_pts = 1024
lxc.mount.auto = proc:mixed sys:ro cgroup:mixed
lxc.mount.entry = tmpfs /dev/shm tmpfs defaults 0 0
lxc.mount.entry = proc dev/.lxc/proc proc create=dir,optional 0 0
lxc.mount.entry = sys dev/.lxc/sys sysfs create=dir,optional 0 0
lxc.mount.entry = /sys/kernel/security sys/kernel/security none bind,optional 0 0
E_O_F
# Setup networking in case if default config does not provide it
if ! grep -q "^lxc.$lxc_conf_net.type" "$LXC_PATH"/config; then
cat <<E_O_F >> "$LXC_PATH"/config
lxc.$lxc_conf_net.type = veth
lxc.$lxc_conf_net.name = eth0
lxc.$lxc_conf_net.link = lxcbr0
lxc.$lxc_conf_net.flags = up
lxc.$lxc_conf_net_ipv4 = 10.0.3.138/24
E_O_F
fi
#################### Scripts #######################################
# Add hook script for rootfs re-mount
cat <<E_O_F >> "$LXC_PATH"/config
lxc.hook.pre-mount = $LXC_PATH/pre-mount.sh
lxc.hook.mount = $LXC_PATH/mount.sh
lxc.hook.stop = $LXC_PATH/stop.sh
E_O_F
cat > "$LXC_PATH"/pre-mount.sh <<"E_O_F"
#!/bin/sh
set -u
# Remove prefix from path to rootfs
LXC_ROOTFS_PATH=${LXC_ROOTFS_PATH#*:}
# TODO: remove this hack by cleaning the bit before 'lxc-destroy' call
# Remove immutable bit from deploy to allow correctly destroy the container with lxc-destroy
[ -d $LXC_ROOTFS_PATH/ostree/deploy/apertis/deploy ] && chattr -i $LXC_ROOTFS_PATH/ostree/deploy/apertis/deploy/* || :
# Read ostree target on current boot
BOOTCFG=$LXC_ROOTFS_PATH/boot/loader/entries/ostree-apertis-0.conf
OPTIONS=$(grep ^options $BOOTCFG | head -n1 | cut -d ' ' -f 2)
ostree=${OPTIONS#ostree=}
# if no ostree target -- boot to the default non-OStree OS
[ -z "$ostree" ] && exit 0
sysroot=$LXC_ROOTFS_PATH
# Adaptation of 'switchroot.sh' from ostree upstream:
# https://github.com/ostreedev/ostree/blob/master/src/switchroot/switchroot.sh
## the ostree boot parameter is avaialbe during the init
# ostree=/ostree/boot.1/.../.../0
## bind mount the ostree deployment to prepare it for move
mount --bind $sysroot$ostree $sysroot$ostree
## bind mount read-only /usr
mount --bind $sysroot$ostree/usr $sysroot$ostree/usr
mount --bind -o remount,ro $sysroot$ostree/usr $sysroot$ostree/usr
## bind mount the physical root
mount --bind $sysroot $sysroot$ostree/sysroot
## bind mount the var directory which is preserved between deployments
mount --bind $sysroot/ostree/deploy/apertis/var $sysroot$ostree/var
## make sure target directories are present within var
cd $sysroot$ostree/var
mkdir -p roothome mnt opt home
cd -
# make happy 'ostree' tool
mount --bind $sysroot/boot $sysroot$ostree/boot
## move the deployment to the sysroot
mount --move $sysroot$ostree $sysroot
## after these the init system should start the switch root process
# Do not need to switch root process for LXC hook!
## Hack a kernel command line for ostree in container
echo -e "ostree=$ostree" > $LXC_ROOTFS_PATH/cmdline
# Create namespace for container
mkdir -p /sys/kernel/security/apparmor/policy/namespaces/lxc-$LXC_NAME
E_O_F
cat > "$LXC_PATH"/mount.sh <<"E_O_F"
#!/bin/sh
set -eu
# Remove prefix from path to rootfs
LXC_ROOTFS_PATH=${LXC_ROOTFS_PATH#*:}
# Substitute command line in container allowing to detect OS properly
mount --bind $LXC_ROOTFS_PATH/cmdline $LXC_ROOTFS_MOUNT/proc/cmdline
E_O_F
cat > "$LXC_PATH"/stop.sh <<"E_O_F"
#!/bin/sh
set -u
# Remove prefix from path to rootfs
LXC_ROOTFS_PATH=${LXC_ROOTFS_PATH#*:}
# TODO: remove this hack by cleaning the bit before 'lxc-destroy' call
# Remove immutable bit from deploy to allow correctly destroy the container with lxc-destroy
[ -d $LXC_ROOTFS_PATH/ostree/deploy/apertis/deploy ] && chattr -i $LXC_ROOTFS_PATH/ostree/deploy/apertis/deploy/* || :
E_O_F
chmod 0755 "$LXC_PATH"/*.sh
#################### AppArmor ######################################
# Add custom apparmor configuration
# namespace is based on containr name with prefix `lxc-`
NAMESPACE="lxc-$LXC_NAME"
# Add configuration for Apparmor
cat <<E_O_F >> "$LXC_PATH"/config
lxc.$lxc_conf_apparmor = lxc-container-apertis//&:$NAMESPACE://unconfined
E_O_F
# Do not try to re-write apparmor profile
apparmor_profile=/etc/apparmor.d/lxc-container-apertis
[ -f $apparmor_profile ] && exit 0
# Add apparmor profile
# This has been taken from lxc-default-with-nesting
# to which were added rules from lxd
cat > "$apparmor_profile" <<EOF
#include <tunables/global>
profile lxc-container-apertis flags=(attach_disconnected,mediate_deleted) {
# AA_PROFILE_BASE (container-base without deny /s/k/security)
network,
capability,
file,
umount,
# dbus, signal, ptrace and unix are only supported by recent apparmor
# versions. Comment them if the apparmor parser doesn't recognize them.
# This also needs additional rules to reach outside of the container via
# DBus, so just let all of DBus within the container.
dbus,
# Allow us to receive signals from anywhere. Note: if per-container profiles
# are supported, for container isolation this should be changed to something
# like:
# signal (receive) peer=unconfined,
# signal (receive) peer=/usr/bin/lxc-start,
signal (receive),
# Allow us to send signals to ourselves
signal peer=@{profile_name},
# Allow other processes to read our /proc entries, futexes, perf tracing and
# kcmp for now (they will need 'read' in the first place). Administrators can
# override with:
# deny ptrace (readby) ...
ptrace (readby),
# Allow other processes to trace us by default (they will need 'trace' in
# the first place). Administrators can override with:
# deny ptrace (tracedby) ...
ptrace (tracedby),
# Allow us to ptrace ourselves
ptrace peer=@{profile_name},
# Allow receive via unix sockets from anywhere. Note: if per-container
# profiles are supported, for container isolation this should be changed to
# something like:
# unix (receive) peer=(label=unconfined),
unix (receive),
# Allow all unix in the container
unix peer=(label=@{profile_name}),
# ignore DENIED message on / remount
deny mount options=(ro, remount) -> /,
deny mount options=(ro, remount, silent) -> /,
# allow tmpfs mounts everywhere
mount fstype=tmpfs,
# allow hugetlbfs mounts everywhere
mount fstype=hugetlbfs,
# allow mqueue mounts everywhere
mount fstype=mqueue,
# allow fuse mounts everywhere
mount fstype=fuse,
mount fstype=fuse.*,
# deny access under /proc/bus to avoid e.g. messing with pci devices directly
deny @{PROC}/bus/** wklx,
# deny writes in /proc/sys/fs but allow binfmt_misc to be mounted
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
deny @{PROC}/sys/fs/** wklx,
# allow efivars to be mounted, writing to it will be blocked though
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
# block some other dangerous paths
deny @{PROC}/kcore rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/sysrq-trigger rwklx,
# deny writes in /sys except for /sys/fs/cgroup, also allow
# fusectl, securityfs and debugfs to be mounted there (read-only)
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
deny /sys/firmware/efi/efivars/** rwklx,
audit /sys/kernel/security/apparmor/** rwklix,
# Apertis end
mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
# deny reads from debugfs
# deny /sys/kernel/debug/{,**} rwklx,
# allow paths to be made slave, shared, private or unbindable
# FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
# mount options=(rw,make-slave) -> **,
# mount options=(rw,make-rslave) -> **,
# mount options=(rw,make-shared) -> **,
# mount options=(rw,make-rshared) -> **,
# mount options=(rw,make-private) -> **,
# mount options=(rw,make-rprivate) -> **,
# mount options=(rw,make-unbindable) -> **,
# mount options=(rw,make-runbindable) -> **,
# allow bind-mounts of anything except /proc, /sys and /dev
mount options=(rw,bind) /[^spd]*{,/**},
mount options=(rw,bind) /d[^e]*{,/**},
mount options=(rw,bind) /de[^v]*{,/**},
mount options=(rw,bind) /dev/.[^l]*{,/**},
mount options=(rw,bind) /dev/.l[^x]*{,/**},
mount options=(rw,bind) /dev/.lx[^c]*{,/**},
mount options=(rw,bind) /dev/.lxc?*{,/**},
mount options=(rw,bind) /dev/[^.]*{,/**},
mount options=(rw,bind) /dev?*{,/**},
mount options=(rw,bind) /p[^r]*{,/**},
mount options=(rw,bind) /pr[^o]*{,/**},
mount options=(rw,bind) /pro[^c]*{,/**},
mount options=(rw,bind) /proc?*{,/**},
mount options=(rw,bind) /s[^y]*{,/**},
mount options=(rw,bind) /sy[^s]*{,/**},
mount options=(rw,bind) /sys?*{,/**},
# allow moving mounts except for /proc, /sys and /dev
mount options=(rw,move) /[^spd]*{,/**},
mount options=(rw,move) /d[^e]*{,/**},
mount options=(rw,move) /de[^v]*{,/**},
mount options=(rw,move) /dev/.[^l]*{,/**},
mount options=(rw,move) /dev/.l[^x]*{,/**},
mount options=(rw,move) /dev/.lx[^c]*{,/**},
mount options=(rw,move) /dev/.lxc?*{,/**},
mount options=(rw,move) /dev/[^.]*{,/**},
mount options=(rw,move) /dev?*{,/**},
mount options=(rw,move) /p[^r]*{,/**},
mount options=(rw,move) /pr[^o]*{,/**},
mount options=(rw,move) /pro[^c]*{,/**},
mount options=(rw,move) /proc?*{,/**},
mount options=(rw,move) /s[^y]*{,/**},
mount options=(rw,move) /sy[^s]*{,/**},
mount options=(rw,move) /sys?*{,/**},
# generated by: lxc-generate-aa-rules.py container-rules.base
deny /proc/sys/[^kn]*{,/**} wklx,
deny /proc/sys/k[^e]*{,/**} wklx,
deny /proc/sys/ke[^r]*{,/**} wklx,
deny /proc/sys/ker[^n]*{,/**} wklx,
deny /proc/sys/kern[^e]*{,/**} wklx,
deny /proc/sys/kerne[^l]*{,/**} wklx,
deny /proc/sys/kernel/[^smhd]*{,/**} wklx,
deny /proc/sys/kernel/d[^o]*{,/**} wklx,
deny /proc/sys/kernel/do[^m]*{,/**} wklx,
deny /proc/sys/kernel/dom[^a]*{,/**} wklx,
deny /proc/sys/kernel/doma[^i]*{,/**} wklx,
deny /proc/sys/kernel/domai[^n]*{,/**} wklx,
deny /proc/sys/kernel/domain[^n]*{,/**} wklx,
deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,
deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,
deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,
deny /proc/sys/kernel/domainname?*{,/**} wklx,
deny /proc/sys/kernel/h[^o]*{,/**} wklx,
deny /proc/sys/kernel/ho[^s]*{,/**} wklx,
deny /proc/sys/kernel/hos[^t]*{,/**} wklx,
deny /proc/sys/kernel/host[^n]*{,/**} wklx,
deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,
deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,
deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,
deny /proc/sys/kernel/hostname?*{,/**} wklx,
deny /proc/sys/kernel/m[^s]*{,/**} wklx,
deny /proc/sys/kernel/ms[^g]*{,/**} wklx,
deny /proc/sys/kernel/msg*/** wklx,
deny /proc/sys/kernel/s[^he]*{,/**} wklx,
deny /proc/sys/kernel/se[^m]*{,/**} wklx,
deny /proc/sys/kernel/sem*/** wklx,
deny /proc/sys/kernel/sh[^m]*{,/**} wklx,
deny /proc/sys/kernel/shm*/** wklx,
deny /proc/sys/kernel?*{,/**} wklx,
deny /proc/sys/n[^e]*{,/**} wklx,
deny /proc/sys/ne[^t]*{,/**} wklx,
deny /proc/sys/net?*{,/**} wklx,
# Configuration: apparmor profile loading (in namespace)
# Extracted from lxd/apparmor.go getAAProfileContent
# Block everything in /sys/kernel/security that is not apparmor
audit /sys/kernel/security/apparmor/** rwklix,
audit deny /sys/k[^e]*{,/**} wklx,
audit deny /sys/ke[^r]*{,/**} wklx,
audit deny /sys/ker[^n]*{,/**} wklx,
audit deny /sys/kern[^e]*{,/**} wklx,
audit deny /sys/kerne[^l]*{,/**} wklx,
audit deny /sys/kernel/[^sd]*{,/**} wklx,
audit deny /sys/kernel/s[^e]*{,/**} wklx,
audit deny /sys/kernel/se[^c]*{,/**} wklx,
audit deny /sys/kernel/sec[^u]*{,/**} wklx,
audit deny /sys/kernel/secu[^r]*{,/**} wklx,
audit deny /sys/kernel/secur[^i]*{,/**} wklx,
audit deny /sys/kernel/securi[^t]*{,/**} wklx,
audit deny /sys/kernel/securit[^y]*{,/**} wklx,
audit deny /sys/kernel/security/[^a]*{,/**} wklx,
audit deny /sys/kernel/security/a[^p]*{,/**} wklx,
audit deny /sys/kernel/security/ap[^p]*{,/**} wklx,
audit deny /sys/kernel/security/app[^a]*{,/**} wklx,
audit deny /sys/kernel/security/appa[^r]*{,/**} wklx,
audit deny /sys/kernel/security/appar[^m]*{,/**} wklx,
audit deny /sys/kernel/security/apparm[^o]*{,/**} wklx,
audit deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,
audit deny /sys/kernel/security/apparmor?*{,/**} wklx,
audit deny /sys/kernel/security?*{,/**} wklx,
audit deny /sys/kernel?*{,/**} wklx,
change_profile -> :lxc-apertis-nesting://*,
# AA_PROFILE_NESTING (similar to lxc-default-with-nesting)
deny /dev/.lxc/proc/** rw,
deny /dev/.lxc/sys/** rw,
mount fstype=proc -> /var/cache/lxc/**,
mount fstype=sysfs -> /var/cache/lxc/**,
mount options=(rw,bind),
mount fstype=cgroup -> /sys/fs/cgroup/**,
# AA_PROFILE_UNPRIVILEGED
mount options=(rw,make-slave) -> **,
mount options=(rw,make-rslave) -> **,
mount options=(rw,make-shared) -> **,
mount options=(rw,make-rshared) -> **,
mount options=(rw,make-private) -> **,
mount options=(rw,make-rprivate) -> **,
mount options=(rw,make-unbindable) -> **,
mount options=(rw,make-runbindable) -> **,
mount options=(rw,bind),
mount options=(rw,rbind),
}
EOF
# Load and regenerate apparmor cache
apparmor_parser --skip-read-cache --write-cache -r $apparmor_profile
# Regenerate AppArmor cache (T4539)
/lib/apparmor/recache-profiles
exit 0
Loading