From 048492d1c837d32c507853f3719888b6b95c0f63 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Wed, 7 Dec 2022 12:57:57 +0100 Subject: [PATCH] repo-install-test: run inside qemu + kvm Change repo-install-test to run inside of qemu instead of docker. This job needs to run systemd to verify that the systemd services start up properly. Running systemd inside docker was never officially supported, it worked with cgroups1 but does not work anymore with cgroups2. An alternative approach was running inside podman instead of docker (running systemd inside of podman is officially supported). However we would have needed various workarounds with podman and wouldn't be able to test all Osmocom systemd services in the end, due to lack of permissions (see review of I394918fc61de36acce65ffb33defcb8fc21801c4). By running with a separate kernel inside qemu we can run all Osmocom services. Related: OS#5365 Change-Id: Ie7f1bccb05779cb3614016c0b965b810bbb1471b --- .gitignore | 2 +- scripts/repo-install-test.sh | 264 +++++++++++++----- .../{run-inside-docker.sh => run-inside.sh} | 55 ++-- 3 files changed, 235 insertions(+), 86 deletions(-) rename scripts/repo-install-test/{run-inside-docker.sh => run-inside.sh} (91%) diff --git a/.gitignore b/.gitignore index 1009a648..fa28062e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ _deps/ _release_tarballs/ _docker_playground _repo_install_test_data/ -_repo_install_test_cache/ +scripts/repo-install-test/run-inside-env.sh diff --git a/scripts/repo-install-test.sh b/scripts/repo-install-test.sh index f2fbdc85..300357cf 100755 --- a/scripts/repo-install-test.sh +++ b/scripts/repo-install-test.sh @@ -2,7 +2,7 @@ # Environment variables: # * FEED: binary package feed (e.g. "latest", "nightly") # * INTERACTIVE: set to 1 to keep an interactive shell open after the script ran (for debugging) -# * KEEP_CACHE: set to 1 to keep downloaded binary packages (for development) +# * KEEP_VM: for development: don't kill/start VM if still running # * PROJ: OBS project namespace (e.g. "osmocom:latest") # * PROJ_CONFLICT: Conflicting OBS project namespace (e.g. "osmocom:nightly") # * TESTS: which tests to run (all by default, see below for possible values) @@ -14,6 +14,12 @@ DISTROS=" debian10 debian11 " +IMG_DIR="/opt/qemu" +TEST_DIR="scripts/repo-install-test" +IMG_PATH="_repo_install_test_data/temp.qcow2" +PID_FILE="_repo_install_test_data/qemu.pid" +PORT_FILE="_repo_install_test_data/qemu.port" +LOG_FILE="_repo_install_test_data/qemu.log" check_usage() { local i @@ -22,17 +28,198 @@ check_usage() { return fi done + set +x + echo echo "usage: repo-install-test.sh DISTRO" echo "DISTRO: one of: $DISTROS" exit 1 } +get_backing_img_path() { + local ret="" + + case "$DISTRO" in + centos8) + ret="$IMG_DIR/alma-8.5.qcow2" + ;; + debian10) + ret="$IMG_DIR/debian-10.qcow2" + ;; + debian11) + ret="$IMG_DIR/debian-11.qcow2" + ;; + *) + set +x + echo "ERROR: script error, missing img path for $DISTRO" >&2 + exit 1 + ;; + esac + + if [ -e "$ret" ]; then + echo "$ret" + else + set +x + echo "ERROR: file not found: $ret" >&2 + echo "ERROR: qemu images not installed via ansible?" >&2 + exit 1 + fi +} + +find_free_ssh_port() { + SSH_PORT="$(echo "($PPID % 1000) + 22022" | bc)" + while nc -z 127.0.0.1 "$SSH_PORT"; do + SSH_PORT=$((SSH_PORT + 1)) + done + + echo "$SSH_PORT" > "$PORT_FILE" +} + +prepare_img() { + mkdir -p "$(dirname "$IMG_PATH")" + + qemu-img \ + create \ + -f qcow2 \ + -b "$(get_backing_img_path)" \ + -F qcow2 \ + "$IMG_PATH" +} + +qemu_start() { + if [ -n "$KEEP_VM" ] && [ -e "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")"; then + SSH_PORT="$(cat "$PORT_FILE")" + return + fi + + prepare_img + find_free_ssh_port + + (timeout 1h qemu-system-x86_64 \ + -cpu host \ + -device "virtio-net-pci,netdev=net" \ + -display none \ + -drive "file=$IMG_PATH,format=qcow2" \ + -enable-kvm \ + -m 1024 \ + -netdev "user,id=net,hostfwd=tcp:127.0.0.1:$SSH_PORT-:22" \ + -nodefaults \ + -pidfile "$PID_FILE" \ + -serial stdio \ + -smp 16 >"$LOG_FILE" 2>&1) & +} + +qemu_ssh() { + timeout "${TIMEOUT:-1m}" \ + sshpass -p root \ + ssh \ + -p "$SSH_PORT" \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + root@127.0.0.1 \ + -- \ + "$@" +} + +qemu_scp() { + timeout "${TIMEOUT:-1m}" \ + sshpass -p root \ + scp \ + -P "$SSH_PORT" \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + "$@" +} + +qemu_run_test_script() { + cat <<- EOF > "$TEST_DIR/run-inside-env.sh" + #!/bin/sh -ex + + export DISTRO="$DISTRO" + export FEED="$FEED" + export PROJ="$PROJ" + export PROJ_CONFLICT="$PROJ_CONFLICT" + export TESTS="$TESTS" + + /repo-install-test/run-inside.sh + EOF + + qemu_ssh rm -rf /repo-install-test/ + qemu_scp -r "$TEST_DIR" "root@127.0.0.1:/repo-install-test" + + TIMEOUT="1h" qemu_ssh sh -ex /repo-install-test/run-inside-env.sh +} + +qemu_print_log() { + echo + echo "Contents of $LOG_FILE:" + echo + cat "$LOG_FILE" +} + +qemu_ssh_wait() { + set +x + echo + echo "Waiting for VM to boot up..." + echo + set -x + + # PID file does not get created immediately + sleep 1 + local pid="$(cat "$PID_FILE")" + + for i in $(seq 1 6); do + if [ -z "$pid" ] || ! kill -0 "$pid"; then + set +x + echo "ERROR: qemu failed, pid: $pid" + qemu_print_log + exit 1 + fi + + if TIMEOUT=10s qemu_ssh true; then + return + fi + + sleep 1 + done + + set +x + echo "ERROR: timeout, VM did not boot up. Log file contents:" + qemu_print_log + exit 1 +} + +clean_up() { + if [ -n "$KEEP_VM" ]; then + return + fi + + if [ -e "$PID_FILE" ]; then + kill $(cat "$PID_FILE") || true + fi + + rm -f "$IMG_PATH" +} + +clean_up_trap() { + if [ -n "$INTERACTIVE" ]; then + TIMEOUT="1h" qemu_ssh bash -i + fi + + set +x + echo + echo "### Clean up ###" + echo + set -x + + trap - EXIT INT TERM 0 + + clean_up +} + check_usage -docker_images_require "$DISTRO-repo-install-test" FEED="${FEED:-nightly}" PROJ="${PROJ:-osmocom:$FEED}" -CONTAINER="$DISTRO-repo-install-test-$FEED" if [ -z "$TESTS" ]; then TESTS=" @@ -57,69 +244,18 @@ if [ -z "$PROJ_CONFLICT" ]; then esac fi -# Try to run "systemctl status" 10 times, kill the container on failure -check_if_systemd_is_running() { - for i in $(seq 1 10); do - sleep 1 - if docker exec "$CONTAINER" systemctl status; then - return - fi - done - echo "ERROR: systemd is not running properly." - docker container kill "$CONTAINER" - exit 1 -} +clean_up +trap clean_up_trap EXIT INT TERM 0 -# Kill already running container -if [ "$(docker inspect -f '{{.State.Running}}' "$CONTAINER" 2> /dev/null)" = "true" ]; then - docker container kill "$CONTAINER" - sleep 1 -fi +qemu_start +qemu_ssh_wait -# Additional docker run arguments -args="" -if [ -n "$KEEP_CACHE" ]; then - args="$args -e KEEP_CACHE=1" - args="$args -v $OSMO_CI_DIR/_repo_install_test_cache/debian/apt:/var/cache/apt" - args="$args -v $OSMO_CI_DIR/_repo_install_test_cache/centos/dnf:/var/cache/dnf" -fi -# Run the container -# * This does not output anything, for debugging add -it and remove &. -# * /run, /tmp, cgroups, SYS_ADMIN: needed for systemd -# * SYS_NICE: needed for changing CPUScheduling{Policy,Priority} (osmo-bts systemd service files) -docker run --rm \ - -v "$OSMO_CI_DIR/scripts/repo-install-test:/repo-install-test:ro" \ - --name "$CONTAINER" \ - -e FEED="$FEED" \ - -e PROJ="$PROJ" \ - -e PROJ_CONFLICT="$PROJ_CONFLICT" \ - -e DISTRO="$DISTRO" \ - -e TESTS="$TESTS" \ - -e container=docker \ - --tmpfs /run \ - --tmpfs /run/lock \ - --tmpfs /tmp \ - -v /sys/fs/cgroup:/sys/fs/cgroup:ro \ - --cap-add SYS_ADMIN \ - --cap-add SYS_NICE \ - $args \ - "$USER/$DISTRO-repo-install-test" \ - /lib/systemd/systemd & -check_if_systemd_is_running +set +x +echo +echo "VM is running!" +echo +set -x -# Run the test script -ret=0 -if ! docker exec "$CONTAINER" /repo-install-test/run-inside-docker.sh; then - ret=1 -fi - -# Interactive shell -if [ -n "$INTERACTIVE" ]; then - docker exec -it "$CONTAINER" bash || true -fi - -docker container kill "$CONTAINER" - -exit $ret +qemu_run_test_script diff --git a/scripts/repo-install-test/run-inside-docker.sh b/scripts/repo-install-test/run-inside.sh similarity index 91% rename from scripts/repo-install-test/run-inside-docker.sh rename to scripts/repo-install-test/run-inside.sh index ce71bb8d..5f01524a 100755 --- a/scripts/repo-install-test/run-inside-docker.sh +++ b/scripts/repo-install-test/run-inside.sh @@ -105,10 +105,9 @@ configure_osmocom_repo_debian() { # Add repository key if ! [ -e "$release_key" ]; then - apt-get update - apt install -y wget wget -O /tmp/Release.key "https://obs.osmocom.org/projects/$proj/public_key" fi + apt-key add /tmp/Release.key echo "deb http://$obs_repo ./" > "/etc/apt/sources.list.d/$proj.list" @@ -158,30 +157,44 @@ configure_osmocom_repo() { esac } -configure_keep_cache_debian() { - rm /etc/apt/apt.conf.d/docker-clean +prepare_vm_debian() { + # fmtutil fails in tex-common postinst script. This gets installed as + # dependency of osmo-gsm-manuals-dev, but is completely unrelated to + # what we want to test here so just stub it out. + ln -sf /bin/true /usr/bin/fmtutil + echo "path-exclude=/usr/bin/fmtutil" >> /etc/dpkg/dpkg.cfg.d/stub - # "apt" will actually remove the cache by default, even if "apt-get" keeps it. - # https://unix.stackexchange.com/a/447607 - echo "Binary::apt::APT::Keep-Downloaded-Packages "true";" \ - > /etc/apt/apt.conf.d/01keep-debs -} - -configure_keep_cache_centos() { - echo "keepcache=1" >> /etc/dnf/dnf.conf -} - -configure_keep_cache() { - if [ -z "$KEEP_CACHE" ]; then - return - fi + apt-get update --allow-releaseinfo-change + apt-get install -y --no-install-recommends \ + aptitude \ + ca-certificates \ + gnupg2 \ + wget + case "$DISTRO" in + debian10) + # Can't access https://osmocom.org otherwise + apt-get install -y --no-install-recommends \ + libgnutls30 + ;; + esac +} + +prepare_vm_centos() { + # Install dnf-utils for repoquery + dnf install -y dnf-utils + + # Make additional development libraries available + yum config-manager --set-enabled powertools +} + +prepare_vm() { case "$DISTRO" in debian*) - configure_keep_cache_debian + prepare_vm_debian ;; centos*) - configure_keep_cache_centos + prepare_vm_centos ;; esac } @@ -402,7 +415,7 @@ services_check() { } check_env -configure_keep_cache +prepare_vm configure_osmocom_repo "$PROJ" for test in $TESTS; do