#!/bin/bash # I, Ian Kelling, follow the GNU license recommendations at # https://www.gnu.org/licenses/license-recommendations.en.html. They # recommend that small programs, < 300 lines, be licensed under the # Apache License 2.0. This file contains or is part of one or more small # programs. If a small program grows beyond 300 lines, I plan to switch # its license to GPL. # Copyright 2024 Ian Kelling # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@" # https://savannah.nongnu.org/projects/bash-bear-trap/ set -e; . /usr/local/lib/bash-bear; set +e m() { local out printf "newns: %s\n" "$*" if ! out=$("$@" 2>&1); then echo "newns: WARNING: last command exit code: $?" elif [[ ! $out ]]; then echo "newns: WARNING: no output from last command" fi } usage() { cat <. EOF exit ${1:-0} } #### begin arg parsing #### create=false temp=$(getopt -l help,create hcdn: "$@") || usage 1 eval set -- "$temp" while true; do case $1 in -c|--create) create=true; shift ;; -n) network=$2; shift 2 ;; -h|--help) usage ;; --) shift; break ;; *) echo "$0: Internal error!" ; exit 1 ;; esac done if (( $# != 2 )); then usage 1 fi action=$1 nn=$2 # namespace name #### end arg parsing #### #### begin sanity checking #### install_error=false if ! type -p ip &>/dev/null; then echo "please install the iproute2 package" install_error=true fi if ! type -p iptables &>/dev/null; then echo "please install the iptables package" install_error=true fi if $install_error; then exit 1 fi #### end sanity checking #### v0=veth0-$nn v1=veth1-$nn ip_base=10.173 ### begin make the default network namespace be named "default" ### mkdir -p /run/netns target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then # -f to avoid a race condition with running twice ln -sf /proc/1/ns/net $target fi ### end make the default network namespace be named "default" ### if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then create=true fi # otherwise we are already in the network namespace and it's unnamed. if $create; then ipnnargs="-n $nn" fi ipd() { ip -n default "$@"; } # run ip in the network namespace ipnn() { ip $ipnnargs "$@"; } # default network namespace exec dexec() { ip netns exec default "$@"; } # mount namespace exec mexec() { /usr/bin/nsenter --mount=/run/mount-namespaces/$nn "$@"; } nat() { # Note: duplicated in show() # Note, in a previous commit i specified the output interface with -o, # but that broke things when my gateway interface changed, and I can't # see any advantage to it, so I removed it. dexec iptables -t nat $1 POSTROUTING -s $network.0/24 -j MASQUERADE \ -m comment --comment "systemd network namespace nat" } # d = default diptables-add() { if ! dexec iptables -C "$@" &>/dev/null; then dexec iptables -I "$@" fi } find-network() { if [[ $network ]]; then return fi found=false ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" for ((i=1; i <= 254; i++)); do network=$ip_base.$i if ! printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then found=true break fi done if ! $found; then echo "$0: error: no open network found" exit 1 fi } # ip add idempotent (if it doesn't exist already) ip-add() { local cmd net dev cmd=$1 net=$2 dev=$3 if ! $cmd addr show dev $dev | sed 's/^ *//;s/ *$//' | grep -xF "inet $net scope global $dev"; then $cmd addr add $net dev $dev fi } start() { find-network #### begin mount namespace setup #### mkdir -p /run/mount-namespaces if ! mountpoint /run/mount-namespaces >/dev/null; then mount --bind /run/mount-namespaces /run/mount-namespaces fi # note: This is outside the mount condition because I've mysteriously # had this become shared instead of private, perhaps it # got remounted somehow and lost the setting. mount --make-private /run/mount-namespaces if [[ ! -e /run/mount-namespaces/$nn ]]; then touch /run/mount-namespaces/$nn fi if ! mountpoint /run/mount-namespaces/$nn >/dev/null; then # Here, we specify that we only want mount changes changes under # this mountpoint to be propagated into the bind, but changes # from within the bind do not propagate to outside the bind. # # slave is documented in. # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz # documentation on propagation is a bit weird because it # confusingly talks about binds, namespaces, and mirrors (which # seems to be just another name for bind), shared subtrees # (which seems to be a term for binds and namespaces), and does not # properly specify whether the documentation applies to binds, # namespaces, or both. Notably, propagation for binds is marked # on the original mount point, and propagation for a mount # namespace is marked on mounts within the namespace. unshare --propagation slave --mount=/run/mount-namespaces/$nn /bin/true fi #### end mount namespace setup #### if $create; then if ! ip netns | grep -xF $nn &>/dev/null; then ip netns add $nn fi ip -n $nn link set dev lo up fi echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward status=none # docker helpfully changes the default FORWARD to drop... diptables-add FORWARD -i $v0 -j ACCEPT diptables-add FORWARD -o $v0 -j ACCEPT err-cleanup() { stop; } ipnn link add $v0 type veth peer name $v1 ipnn link set $v0 netns default ip-add ipd $network.1/24 $v0 ipd link set $v0 up nat -C &>/dev/null || nat -A ip-add ipnn $network.2/24 $v1 ipnn link set $v1 up cmd="ipnn route add default via $network.1" $cmd fails=0 max_fails=2 # I've had adding the default route mysteriously fail on boot, so # here we check that it succeeded, do a sleep and a retry. while true; do default_route=$(ipnn route show default | sed -r 's,^[[:space:]]+|[[:space:]]+$,,') if [[ $default_route != "default via $network.1 dev $v1" ]]; then fails=$((fails + 1)) else break fi if (( fails >= max_fails )); then echo "$0: ERROR: default route added but not found, retried $max_fails. expected route: 'default via $network.1 dev $v1', found: '$default_route'" # Note: for debugging, if you have a systemd unit which tears down # the newns upon failure, you may want to uncomment the break so # that we proceed and can inspect the system. break exit 1 else sleep 1 $cmd fi done if (( fails >= 1 )); then echo "$0: WARNING: route added but not found until retried $max_fails times: $cmd" fi ###### begin setup resolvconf if [[ -e /run/resolvconf ]]; then # resolvconf probably installed resolv_copy=/root/resolvconf-$nn # this condition should never happen, just coding defensively if mexec mountpoint /run/resolvconf &>/dev/null; then mexec umount /run/resolvconf fi cp -aT /run/resolvconf $resolv_copy if ! mexec mount -o bind $resolv_copy /run/resolvconf; then echo "error: resolv-conf bindmount failed" exit 1 fi # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback # in the network namespace, so adjust the address. if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq mexec resolvconf -u fi # and in debian based distros at least, it runs with --local-service, and needs a restart # to know about the new local network if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then systemctl restart dnsmasq fi # background: if we did this in openvpn's resolv-conf script, we could guard it in # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null # and we could get $nn by # config_basename=${config%%.*} # config_basename=${config_basename##*/} # but dnsmasq forces us to do it earlier. fi # end if [[ -e /run/resolvconf ]] ###### end setup resolvconf } stop() { if [[ ! $network ]]; then network=$(ipd -f inet a show dev $v0 2>/dev/null | awk '/inet / {print $2}' | sed -r 's,\.[0-9]+/.*,,' ||:) fi if ipd link list $v0 &>/dev/null; then # this also deletes $v1 and the route we added. ipd link del $v0 fi if [[ $network ]] && nat -C &>/dev/null; then nat -D; fi dexec iptables -D FORWARD -i $v0 -j ACCEPT &>/dev/null ||: if $create && [[ -e /var/run/netns/$nn ]]; then ip netns del $nn fi # not sure this is necessary since we are tearing down the mount namespace if mexec mountpoint /run/resolvconf &>/dev/null; then mexec umount /run/resolvconf fi if mountpoint /run/mount-namespaces/$nn >/dev/null; then umount /run/mount-namespaces/$nn fi } show() { m ipd link list $v0 m dexec iptables -t nat -C POSTROUTING -s $network.0/24 -j MASQUERADE \ -m comment --comment "systemd network namespace nat" ||: m dexec iptables -C FORWARD -i $v0 -j ACCEPT m mexec mountpoint /run/resolvconf m mountpoint /run/mount-namespaces/$nn } case $action in start|stop|show) $action ;; *) echo "$0: error: unsupported action" exit 1 ;; esac