X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=newns;h=ad4c06c437c7b96d85232b41293ba48e19cb78d1;hb=61d6fbfb34acb15c65fe71ef9f3c34592a272fa6;hp=4fc887c6f038765d368d82286b763cd071b1cb0f;hpb=633e865aec9974d560bc7b6e3e526713ed8b41e9;p=newns diff --git a/newns b/newns index 4fc887c..ad4c06c 100755 --- a/newns +++ b/newns @@ -14,137 +14,334 @@ # limitations under the License. -# Create a network namespace. Designed for use from systemd. - [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@" -cd "${BASH_SOURCE%/*}" -source ../errhandle/errcatch-function -source ../errhandle/bash-trace-function -errcatch +if [[ ! $ERRHANDLE_PATH ]]; then + ERRHANDLE_PATH=$(readlink -f "${BASH_SOURCE}") + ERRHANDLE_PATH=$(readlink -f ${ERRHANDLE_PATH%/*}/../errhandle) +fi +err_sourced=true +for p in $ERRHANDLE_PATH/{errcatch-function,bash-trace-function}; do + if [[ -e $p ]]; then + source $p + else + err_sourced=false + fi +done +if $err_sourced; then + errcatch +else + set -eE -o pipefail + trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR +fi + +usage() { + cat <. +EOF + exit ${1:-0} +} + + +#### begin arg parsing #### +create=false +temp=$(getopt -l help,create hcn: "$@") || usage 1 +eval set -- "$temp" +while true; do + case $1 in + -c|--create) create=true; shift ;; + -n) network=$2; shift 2 ;; + -h|--help) usage ;; + --) shift; break ;; + *) echo "$0: Internal error!" ; exit 1 ;; + esac +done +if (( $# != 2 )); then + usage 1 +fi + +action=$1 +nn=$2 # namespace name +#### end arg parsing #### +#### begin sanity checking #### install_error=false if ! type -p ip &>/dev/null; then - echo "please install the iproute2 package" - install_error=true + echo "please install the iproute2 package" + install_error=true fi if ! type -p iptables &>/dev/null; then - echo "please install the iptables package" - install_error=true + echo "please install the iptables package" + install_error=true fi if $install_error; then - exit 1 + exit 1 fi - -## end sanity checking ## +#### end sanity checking #### v0=veth0-$nn v1=veth1-$nn ip_base=10.173 +if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then + create=true +fi +# make the default network namespace be named target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then - mkdir -p /run/netns - # make the default network namespace be named - ln -s /proc/1/ns/net $target + mkdir -p /run/netns + ln -s /proc/1/ns/net $target fi ipd() { ip -n default "$@"; } +if $create; then + # run ip in the network namespace + ipnn() { ip -n $nn "$@"; } +else + # we are already in the network namespace and it's unnamed. + # run ip in the network namespace + ipnn() { ip "$@"; } +fi +# default network namespace exec dexec() { ip netns exec default "$@"; } +# mount namespace exec +mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; } + + +# background: head -n1 is defensive. Not sure if there is some weird feature +# for 2 routes to be 0/0. +gateway_ifs=($(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')) + +if [[ ! $gateway_ifs ]]; then + cat >&2 </dev/null; then + dexec iptables -I "$@" + fi -# head -n1 is defensive. I don't know if it's possible to have more -# than one default route. -gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*\s(\S+)\s*$/\1/') -nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \ - -m comment --comment "systemd network namespace nat"; } +} find_network() { - found=false - existing=false - ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" - for ((i=0; i <= 254; i++)); do - network=$ip_base.$i - if printf "%s\n" "$ips" | grep "^${network//./\\.}"; then - existing=true - else - found=true - break - fi - done + if [[ $network ]]; then + return + fi + found=false + existing=false + ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" + for ((i=1; i <= 254; i++)); do + network=$ip_base.$i + if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then + existing=true + else + found=true + break + fi + done } start() { + find_network + if ! $found; then + echo "$0: error: no open network found" + exit 1 + fi + + #### begin mount namespace setup #### + mkdir -p /root/mount_namespaces + if ! mountpoint /root/mount_namespaces >/dev/null; then + mount --bind /root/mount_namespaces /root/mount_namespaces + fi + # note: This is outside the mount condition because I've mysteriously + # had this become shared instead of private, perhaps it + # got remounted somehow and lost the setting. + mount --make-private /root/mount_namespaces + if [[ ! -e /root/mount_namespaces/$nn ]]; then + touch /root/mount_namespaces/$nn + fi + if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then + # Here, we specify that we only want mount changes changes under + # this mountpoint to be propagated into the bind, but changes + # from within the bind do not propagate to outside the bind. + # + # slave is documented in. + # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz + # documentation on propagation is a bit weird because it + # confusingly talks about binds, namespaces, and mirrors (which + # seems to be just another name for bind), shared subtrees + # (which seems to a term for binds and namespaces), and does not + # properly specify whether the documentation applies to binds, + # namespaces, or both. Notably, propagation for binds is marked + # on the original mount point, and propagation for a mount + # namespace is marked on mounts within the namespace. + unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true + fi + + #### end mount namespace setup #### + + + if $create; then + ip netns add $nn + ip -n $nn link set dev lo up + fi + + echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null + + # docker helpfully changes the default FORWARD to drop... + diptables-add FORWARD -i $v0 -j ACCEPT + diptables-add FORWARD -o $v0 -j ACCEPT + + + _errcatch_cleanup=stop + ipnn link add $v0 type veth peer name $v1 + ipnn link set $v0 netns default + ipd addr add $network.1/24 dev $v0 + ipd link set $v0 up + nat -C &>/dev/null || nat -A + ipnn addr add $network.2/24 dev $v1 + ipnn link set $v1 up + ipnn route add default via $network.1 + + ###### begin setup resolvconf + resolv_copy=/root/resolvconf-$nn + + # this condition should never happen, just coding defensively + if mexec mountpoint /run/resolvconf &>/dev/null; then + mexec umount /run/resolvconf + fi + cp -aT /run/resolvconf $resolv_copy + if ! mexec mount -o bind $resolv_copy /run/resolvconf; then + echo "error: resolv-conf bindmount failed" + exit 1 + fi + # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback + # in the network namespace, so adjust the address. + if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then + mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq + mexec resolvconf -u + fi + # and in debian based distros at least, it runs with --local-service, and needs a restart + # to know about the new local network + if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then + systemctl restart dnsmasq + fi + + # background: if we did this in openvpn's resolv-conf script, we could guard it in + # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null + # and we could get $nn by + # config_basename=${config%%.*} + # config_basename=${config_basename##*/} + # but dnsmasq forces us to do it earlier. + ###### end setup resolvconf - mkdir -p /root/mount_namespaces - if ! mountpoint /root/mount_namespaces >/dev/null; then - mount --bind /root/mount_namespaces /root/mount_namespaces - mount --make-private /root/mount_namespaces - fi - if [[ ! -e /root/mount_namespaces/$nn ]]; then - touch /root/mount_namespaces/$nn - fi - if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then - unshare --mount=/root/mount_namespaces/$nn - fi - - - - find_network - if ! $found; then - echo "$0: error: no open network found" - exit 1 - fi - - echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null - - _errcatch_cleanup=stop - ip link add $v0 type veth peer name $v1 - ip link set $v0 netns default - ipd addr add $network.1/24 dev $v0 - ipd link set $v0 up - nat -C &>/dev/null || nat -A - ip addr add $network.2/24 dev $v1 - ip link set $v1 up - ip route add default via $network.1 } stop() { - if ipd link list $v0 &>/dev/null; then - # this also deletes $v1 and the route we added. - ipd link del $v0 - fi - find_network - if ! $existing; then - if nat -C &>/dev/null; then nat -D; fi - fi + if ipd link list $v0 &>/dev/null; then + # this also deletes $v1 and the route we added. + ipd link del $v0 + fi + find_network + if ! $existing; then + if nat -C &>/dev/null; then nat -D; fi + fi + dexec iptables -D FORWARD -i $v0 -j ACCEPT ||: + if $create && [[ -e /var/run/netns/client ]]; then + ip netns del $nn + fi + + # not sure this is necessary since we are tearing down the mount namespace + if mexec mountpoint /run/resolvconf &>/dev/null; then + mexec umount /run/resolvconf + fi + + if mountpoint /root/mount_namespaces/$nn >/dev/null; then + umount /root/mount_namespaces/$nn + fi } case $action in - start|stop) - $action - ;; - *) - echo "$0: error: unsupported action" - exit 1 - ;; + start|stop) + $action + ;; + *) + echo "$0: error: unsupported action" + exit 1 + ;; esac