X-Git-Url: https://iankelling.org/git/?p=newns;a=blobdiff_plain;f=newns;h=5075181714569a1d6e8980bb27af27e0b3eb4ddc;hp=a2d896d0b13ea33dd9121122f84e7331066ac197;hb=HEAD;hpb=6b15ac48b3f675a208d18bb267b82305eb8c8457 diff --git a/newns b/newns index a2d896d..9a7a6ff 100755 --- a/newns +++ b/newns @@ -1,5 +1,12 @@ #!/bin/bash -# Copyright (C) 2017 Ian Kelling +# I, Ian Kelling, follow the GNU license recommendations at +# https://www.gnu.org/licenses/license-recommendations.en.html. They +# recommend that small programs, < 300 lines, be licensed under the +# Apache License 2.0. This file contains or is part of one or more small +# programs. If a small program grows beyond 300 lines, I plan to switch +# its license to GPL. + +# Copyright 2024 Ian Kelling # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,40 +23,49 @@ [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@" -tmp="$(readlink -f "${BASH_SOURCE}")"; script_dir="${tmp%/*}" -if [[ ! $ERRHANDLE_PATH ]]; then - ERRHANDLE_PATH="$script_dir"/../errhandle/err -fi -if [[ -s $ERRHANDLE_PATH ]]; then - source $ERRHANDLE_PATH -else - cd "$script_dir" - if ! wget -O err 'https://iankelling.org/git/?p=errhandle;a=blob_plain;f=err;hb=HEAD'; then - echo "$0: failed to get errhandle dependency" >&2 - exit 1 +# https://savannah.nongnu.org/projects/bash-bear-trap/ +set -e; . /usr/local/lib/bash-bear; set +e + +m() { + local out + printf "newns: %s\n" "$*" + if ! out=$("$@" 2>&1); then + echo "newns: WARNING: last command exit code: $?" + elif [[ ! $out ]]; then + echo "newns: WARNING: no output from last command" fi - source err -fi +} usage() { cat <. EOF @@ -109,7 +123,7 @@ EOF #### begin arg parsing #### create=false -temp=$(getopt -l help,create hcn: "$@") || usage 1 +temp=$(getopt -l help,create hcdn: "$@") || usage 1 eval set -- "$temp" while true; do case $1 in @@ -143,57 +157,47 @@ if $install_error; then fi #### end sanity checking #### - v0=veth0-$nn v1=veth1-$nn ip_base=10.173 -if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then - create=true -fi -# make the default network namespace be named +### begin make the default network namespace be named "default" ### +mkdir -p /run/netns target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then - mkdir -p /run/netns - ln -s /proc/1/ns/net $target + # -f to avoid a race condition with running twice + ln -sf /proc/1/ns/net $target fi +### end make the default network namespace be named "default" ### - -ipd() { ip -n default "$@"; } +if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then + create=true +fi +# otherwise we are already in the network namespace and it's unnamed. if $create; then - # run ip in the network namespace - ipnn() { ip -n $nn "$@"; } -else - # we are already in the network namespace and it's unnamed. - # run ip in the network namespace - ipnn() { ip "$@"; } + ipnnargs="-n $nn" fi + + +ipd() { ip -n default "$@"; } + +# run ip in the network namespace +ipnn() { ip $ipnnargs "$@"; } + # default network namespace exec dexec() { ip netns exec default "$@"; } # mount namespace exec -mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; } - +mexec() { /usr/bin/nsenter --mount=/run/mount-namespaces/$nn "$@"; } -# background: head -n1 is defensive. Not sure if there is some weird feature -# for 2 routes to be 0/0. -gateway_ifs=($(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')) - -if [[ ! $gateway_ifs ]]; then - cat >&2 </dev/null; then - existing=true - else + if ! printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then found=true break fi done -} - -start() { - find_network if ! $found; then echo "$0: error: no open network found" exit 1 fi +} + +# ip add idempotent (if it doesn't exist already) +ip-add() { + local cmd net dev + cmd=$1 + net=$2 + dev=$3 + if ! $cmd addr show dev $dev | sed 's/^ *//;s/ *$//' | grep -xF "inet $net scope global $dev"; then + $cmd addr add $net dev $dev + fi + +} + +start() { + find-network #### begin mount namespace setup #### - mkdir -p /root/mount_namespaces - if ! mountpoint /root/mount_namespaces >/dev/null; then - mount --bind /root/mount_namespaces /root/mount_namespaces + mkdir -p /run/mount-namespaces + if ! mountpoint /run/mount-namespaces >/dev/null; then + mount --bind /run/mount-namespaces /run/mount-namespaces fi # note: This is outside the mount condition because I've mysteriously # had this become shared instead of private, perhaps it # got remounted somehow and lost the setting. - mount --make-private /root/mount_namespaces - if [[ ! -e /root/mount_namespaces/$nn ]]; then - touch /root/mount_namespaces/$nn + mount --make-private /run/mount-namespaces + if [[ ! -e /run/mount-namespaces/$nn ]]; then + touch /run/mount-namespaces/$nn fi - if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then + if ! mountpoint /run/mount-namespaces/$nn >/dev/null; then # Here, we specify that we only want mount changes changes under # this mountpoint to be propagated into the bind, but changes # from within the bind do not propagate to outside the bind. @@ -251,85 +264,115 @@ start() { # documentation on propagation is a bit weird because it # confusingly talks about binds, namespaces, and mirrors (which # seems to be just another name for bind), shared subtrees - # (which seems to a term for binds and namespaces), and does not + # (which seems to be a term for binds and namespaces), and does not # properly specify whether the documentation applies to binds, # namespaces, or both. Notably, propagation for binds is marked # on the original mount point, and propagation for a mount # namespace is marked on mounts within the namespace. - unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true + unshare --propagation slave --mount=/run/mount-namespaces/$nn /bin/true fi #### end mount namespace setup #### if $create; then - ip netns add $nn + if ! ip netns | grep -xF $nn &>/dev/null; then + ip netns add $nn + fi ip -n $nn link set dev lo up fi - echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null + echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward status=none # docker helpfully changes the default FORWARD to drop... diptables-add FORWARD -i $v0 -j ACCEPT diptables-add FORWARD -o $v0 -j ACCEPT - _errcatch_cleanup=stop + err-cleanup() { stop; } ipnn link add $v0 type veth peer name $v1 ipnn link set $v0 netns default - ipd addr add $network.1/24 dev $v0 + ip-add ipd $network.1/24 $v0 ipd link set $v0 up nat -C &>/dev/null || nat -A - ipnn addr add $network.2/24 dev $v1 + ip-add ipnn $network.2/24 $v1 ipnn link set $v1 up - ipnn route add default via $network.1 + cmd="ipnn route add default via $network.1" + $cmd + fails=0 + max_fails=2 + # I've had adding the default route mysteriously fail on boot, so + # here we check that it succeeded, do a sleep and a retry. + while true; do + default_route=$(ipnn route show default | sed -r 's,^[[:space:]]+|[[:space:]]+$,,') + if [[ $default_route != "default via $network.1 dev $v1" ]]; then + fails=$((fails + 1)) + else + break + fi + if (( fails >= max_fails )); then + echo "$0: ERROR: default route added but not found, retried $max_fails. expected route: 'default via $network.1 dev $v1', found: '$default_route'" + # Note: for debugging, if you have a systemd unit which tears down + # the newns upon failure, you may want to uncomment the break so + # that we proceed and can inspect the system. break + exit 1 + else + sleep 1 + $cmd + fi + done + if (( fails >= 1 )); then + echo "$0: WARNING: route added but not found until retried $max_fails times: $cmd" + fi - ###### begin setup resolvconf - resolv_copy=/root/resolvconf-$nn - # this condition should never happen, just coding defensively - if mexec mountpoint /run/resolvconf &>/dev/null; then - mexec umount /run/resolvconf - fi - cp -aT /run/resolvconf $resolv_copy - if ! mexec mount -o bind $resolv_copy /run/resolvconf; then - echo "error: resolv-conf bindmount failed" - exit 1 - fi - # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback - # in the network namespace, so adjust the address. - if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then - mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq - mexec resolvconf -u - fi - # and in debian based distros at least, it runs with --local-service, and needs a restart - # to know about the new local network - if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then - systemctl restart dnsmasq - fi + ###### begin setup resolvconf + if [[ -e /run/resolvconf ]]; then # resolvconf probably installed + resolv_copy=/root/resolvconf-$nn - # background: if we did this in openvpn's resolv-conf script, we could guard it in - # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null - # and we could get $nn by - # config_basename=${config%%.*} - # config_basename=${config_basename##*/} - # but dnsmasq forces us to do it earlier. - ###### end setup resolvconf + # this condition should never happen, just coding defensively + if mexec mountpoint /run/resolvconf &>/dev/null; then + mexec umount /run/resolvconf + fi + cp -aT /run/resolvconf $resolv_copy + if ! mexec mount -o bind $resolv_copy /run/resolvconf; then + echo "error: resolv-conf bindmount failed" + exit 1 + fi + # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback + # in the network namespace, so adjust the address. + if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then + mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq + mexec resolvconf -u + fi + # and in debian based distros at least, it runs with --local-service, and needs a restart + # to know about the new local network + if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then + systemctl restart dnsmasq + fi + # background: if we did this in openvpn's resolv-conf script, we could guard it in + # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null + # and we could get $nn by + # config_basename=${config%%.*} + # config_basename=${config_basename##*/} + # but dnsmasq forces us to do it earlier. + fi # end if [[ -e /run/resolvconf ]] + ###### end setup resolvconf } stop() { + if [[ ! $network ]]; then + network=$(ipd -f inet a show dev $v0 2>/dev/null | awk '/inet / {print $2}' | sed -r 's,\.[0-9]+/.*,,' ||:) + fi if ipd link list $v0 &>/dev/null; then # this also deletes $v1 and the route we added. ipd link del $v0 fi - find_network - if ! $existing; then - if nat -C &>/dev/null; then nat -D; fi - fi - dexec iptables -D FORWARD -i $v0 -j ACCEPT ||: - if $create && [[ -e /var/run/netns/client ]]; then + if [[ $network ]] && nat -C &>/dev/null; then nat -D; fi + dexec iptables -D FORWARD -i $v0 -j ACCEPT &>/dev/null ||: + if $create && [[ -e /var/run/netns/$nn ]]; then ip netns del $nn fi @@ -338,13 +381,22 @@ stop() { mexec umount /run/resolvconf fi - if mountpoint /root/mount_namespaces/$nn >/dev/null; then - umount /root/mount_namespaces/$nn + if mountpoint /run/mount-namespaces/$nn >/dev/null; then + umount /run/mount-namespaces/$nn fi } +show() { + m ipd link list $v0 + m dexec iptables -t nat -C POSTROUTING -s $network.0/24 -j MASQUERADE \ + -m comment --comment "systemd network namespace nat" ||: + m dexec iptables -C FORWARD -i $v0 -j ACCEPT + m mexec mountpoint /run/resolvconf + m mountpoint /run/mount-namespaces/$nn +} + case $action in - start|stop) + start|stop|show) $action ;; *)