From 4c177e45b9aa0c9b26aefaabbc8acac767ac294f Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Wed, 27 Mar 2024 16:17:30 -0400 Subject: [PATCH] Improvements, including compatibility issue. We now save mount namespace info in a temporary directory. If you upgrade from here, cleanup old directory with rm -rf /root/mount_namespaces. We add a show command for debugging, and fixup some minor bugs in stop. Adds a check for silent failure of adding default route failure which I might be the cause of an intermittent issue I've seen. I will have to wait and see if it gets detected. --- newns | 145 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 45 deletions(-) diff --git a/newns b/newns index 315c786..34e4ca5 100755 --- a/newns +++ b/newns @@ -19,18 +19,38 @@ # https://savannah.nongnu.org/projects/bash-bear-trap/ set -e; . /usr/local/lib/bash-bear; set +e +m() { + local out + printf "newns: %s\n" "$*" + if ! out=$("$@" 2>&1); then + echo "newns: WARNING: last command exit code: $?" + elif [[ ! $out ]]; then + echo "newns: WARNING: no output from last command" + fi +} usage() { cat <. EOF @@ -94,7 +116,7 @@ EOF #### begin arg parsing #### create=false -temp=$(getopt -l help,create hcn: "$@") || usage 1 +temp=$(getopt -l help,create hcdn: "$@") || usage 1 eval set -- "$temp" while true; do case $1 in @@ -132,37 +154,39 @@ v0=veth0-$nn v1=veth1-$nn ip_base=10.173 -if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then - create=true -fi - -# make the default network namespace be named +### begin make the default network namespace be named "default" ### mkdir -p /run/netns target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then # -f to avoid a race condition with running twice ln -sf /proc/1/ns/net $target fi +### end make the default network namespace be named "default" ### -ipd() { ip -n default "$@"; } - - +if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then + create=true +fi # otherwise we are already in the network namespace and it's unnamed. if $create; then ipnnargs="-n $nn" fi + + +ipd() { ip -n default "$@"; } + # run ip in the network namespace ipnn() { ip $ipnnargs "$@"; } # default network namespace exec dexec() { ip netns exec default "$@"; } # mount namespace exec -mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; } +mexec() { /usr/bin/nsenter --mount=/run/mount-namespaces/$nn "$@"; } nat() { - # note, in a previous commit i specified the output interface with -o, + # Note: duplicated in show() + # Note, in a previous commit i specified the output interface with -o, # but that broke things when my gateway interface changed, and I can't # see any advantage to it, so I removed it. dexec iptables -t nat $1 POSTROUTING -s $network.0/24 -j MASQUERADE \ @@ -177,44 +201,41 @@ diptables-add() { } -find_network() { +find-network() { if [[ $network ]]; then return fi found=false - existing=false ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" for ((i=1; i <= 254; i++)); do network=$ip_base.$i - if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then - existing=true - else + if ! printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then found=true break fi done -} - -start() { - find_network if ! $found; then echo "$0: error: no open network found" exit 1 fi +} + +start() { + find-network #### begin mount namespace setup #### - mkdir -p /root/mount_namespaces - if ! mountpoint /root/mount_namespaces >/dev/null; then - mount --bind /root/mount_namespaces /root/mount_namespaces + mkdir -p /run/mount-namespaces + if ! mountpoint /run/mount-namespaces >/dev/null; then + mount --bind /run/mount-namespaces /run/mount-namespaces fi # note: This is outside the mount condition because I've mysteriously # had this become shared instead of private, perhaps it # got remounted somehow and lost the setting. - mount --make-private /root/mount_namespaces - if [[ ! -e /root/mount_namespaces/$nn ]]; then - touch /root/mount_namespaces/$nn + mount --make-private /run/mount-namespaces + if [[ ! -e /run/mount-namespaces/$nn ]]; then + touch /run/mount-namespaces/$nn fi - if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then + if ! mountpoint /run/mount-namespaces/$nn >/dev/null; then # Here, we specify that we only want mount changes changes under # this mountpoint to be propagated into the bind, but changes # from within the bind do not propagate to outside the bind. @@ -224,12 +245,12 @@ start() { # documentation on propagation is a bit weird because it # confusingly talks about binds, namespaces, and mirrors (which # seems to be just another name for bind), shared subtrees - # (which seems to a term for binds and namespaces), and does not + # (which seems to be a term for binds and namespaces), and does not # properly specify whether the documentation applies to binds, # namespaces, or both. Notably, propagation for binds is marked # on the original mount point, and propagation for a mount # namespace is marked on mounts within the namespace. - unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true + unshare --propagation slave --mount=/run/mount-namespaces/$nn /bin/true fi #### end mount namespace setup #### @@ -255,7 +276,34 @@ start() { nat -C &>/dev/null || nat -A ipnn addr add $network.2/24 dev $v1 ipnn link set $v1 up - ipnn route add default via $network.1 + cmd="ipnn route add default via $network.1" + $cmd + fails=0 + max_fails=2 + # I've had adding the default route mysteriously fail on boot, so + # here we check that it succeeded, do a sleep and a retry. + while true; do + default_route=$(ipnn route show default | sed -r 's,^[[:space:]]+|[[:space:]]+$,,') + if [[ $default_route != "default via $network.1 dev $v1" ]]; then + fails=$((fails + 1)) + else + break + fi + if (( fails >= max_fails )); then + echo "$0: ERROR: default route added but not found, retried $max_fails. expected route: 'default via $network.1 dev $v1', found: '$default_route'" + # Note: for debugging, if you have a systemd unit which tears down + # the newns upon failure, you may want to uncomment the break so + # that we proceed and can inspect the system. break + exit 1 + else + sleep 1 + $cmd + fi + done + if (( fails >= 1 )); then + echo "$0: WARNING: route added but not found until retried $max_fails times: $cmd" + fi + ###### begin setup resolvconf if [[ -e /run/resolvconf ]]; then # resolvconf probably installed @@ -291,19 +339,17 @@ start() { fi # end if [[ -e /run/resolvconf ]] ###### end setup resolvconf - - } stop() { + if [[ ! $network ]]; then + network=$(ipd -f inet a show dev $v0 2>/dev/null | awk '/inet / {print $2}' | sed -r 's,\.[0-9]+/.*,,' ||:) + fi if ipd link list $v0 &>/dev/null; then # this also deletes $v1 and the route we added. ipd link del $v0 fi - find_network - if ! $existing; then - if nat -C &>/dev/null; then nat -D; fi - fi + if [[ $network ]] && nat -C &>/dev/null; then nat -D; fi dexec iptables -D FORWARD -i $v0 -j ACCEPT &>/dev/null ||: if $create && [[ -e /var/run/netns/$nn ]]; then ip netns del $nn @@ -314,13 +360,22 @@ stop() { mexec umount /run/resolvconf fi - if mountpoint /root/mount_namespaces/$nn >/dev/null; then - umount /root/mount_namespaces/$nn + if mountpoint /run/mount-namespaces/$nn >/dev/null; then + umount /run/mount-namespaces/$nn fi } +show() { + m ipd link list $v0 + m dexec iptables -t nat -C POSTROUTING -s $network.0/24 -j MASQUERADE \ + -m comment --comment "systemd network namespace nat" ||: + m dexec iptables -C FORWARD -i $v0 -j ACCEPT + m mexec mountpoint /run/resolvconf + m mountpoint /run/mount-namespaces/$nn +} + case $action in - start|stop) + start|stop|show) $action ;; *) -- 2.30.2