X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=newns;h=cd2466b7b371469a8d1f7c478298ef0fea370b91;hb=b0239f31ed6ba299fbbde710c3ebb080bd5f8c12;hp=aff500b5ecc0f0795121ac05a7c910aafec53bad;hpb=c3e7838c3eb98ddb1db59b52aa3b6fca9cd7f643;p=newns diff --git a/newns b/newns index aff500b..cd2466b 100755 --- a/newns +++ b/newns @@ -37,48 +37,77 @@ fi usage() { cat <. EOF exit ${1:-0} } -## begin arg parsing ## +#### begin arg parsing #### create=false -temp=$(getopt -l help,create hc "$@") || usage 1 +temp=$(getopt -l help,create hcn: "$@") || usage 1 eval set -- "$temp" while true; do case $1 in -c|--create) create=true; shift ;; + -n) network=$2; shift 2 ;; -h|--help) usage ;; --) shift; break ;; *) echo "$0: Internal error!" ; exit 1 ;; @@ -89,11 +118,10 @@ if (( $# != 2 )); then fi action=$1 -nn=$2 # network namespace / namespace name -## end arg parsing ## - -## begin sanity checking ## +nn=$2 # namespace name +#### end arg parsing #### +#### begin sanity checking #### install_error=false if ! type -p ip &>/dev/null; then echo "please install the iproute2 package" @@ -106,8 +134,7 @@ fi if $install_error; then exit 1 fi - -## end sanity checking ## +#### end sanity checking #### v0=veth0-$nn @@ -118,35 +145,62 @@ if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" create=true fi +# make the default network namespace be named target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then mkdir -p /run/netns - # make the default network namespace be named ln -s /proc/1/ns/net $target fi ipd() { ip -n default "$@"; } if $create; then + # run ip in the network namespace ipnn() { ip -n $nn "$@"; } else # we are already in the network namespace and it's unnamed. + # run ip in the network namespace ipnn() { ip "$@"; } fi +# default network namespace exec dexec() { ip netns exec default "$@"; } +# mount namespace exec +mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; } -# head -n1 is defensive. Not sure if there is some weird feature +# background: head -n1 is defensive. Not sure if there is some weird feature # for 2 routes to be 0/0. -gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*\s(\S+)\s*$/\1/') +gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/') + +if [[ ! $gateway_if ]]; then + cat >&2 </dev/null; then + dexec iptables -I "$@" + fi + +} + find_network() { + if [[ $network ]]; then + return + fi found=false existing=false ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" - for ((i=0; i <= 254; i++)); do + for ((i=1; i <= 254; i++)); do network=$ip_base.$i if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then existing=true @@ -158,34 +212,56 @@ find_network() { } start() { - find_network if ! $found; then echo "$0: error: no open network found" exit 1 fi + #### begin mount namespace setup #### mkdir -p /root/mount_namespaces if ! mountpoint /root/mount_namespaces >/dev/null; then mount --bind /root/mount_namespaces /root/mount_namespaces - mount --make-private /root/mount_namespaces fi + # note: This is outside the mount condition because I've mysteriously + # had this become shared instead of private, perhaps it + # got remounted somehow and lost the setting. + mount --make-private /root/mount_namespaces if [[ ! -e /root/mount_namespaces/$nn ]]; then touch /root/mount_namespaces/$nn fi if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then - unshare --mount=/root/mount_namespaces/$nn + # Here, we specify that we only want mount changes changes under + # this mountpoint to be propagated into the bind, but changes + # from within the bind do not propagate to outside the bind. + # + # slave is documented in. + # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz + # documentation on propagation is a bit weird because it + # confusingly talks about binds, namespaces, and mirrors (which + # seems to be just another name for bind), shared subtrees + # (which seems to a term for binds and namespaces), and does not + # properly specify whether the documentation applies to binds, + # namespaces, or both. Notably, propagation for binds is marked + # on the original mount point, and propagation for a mount + # namespace is marked on mounts within the namespace. + unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true fi + #### end mount namespace setup #### + if $create; then ip netns add $nn ip -n $nn link set dev lo up fi + echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null + # docker helpfully changes the default FORWARD to drop... + diptables-add FORWARD -i $v0 -j ACCEPT + diptables-add FORWARD -o $v0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT - echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null _errcatch_cleanup=stop ipnn link add $v0 type veth peer name $v1 @@ -197,6 +273,33 @@ start() { ipnn link set $v1 up ipnn route add default via $network.1 + ###### begin setup resolvconf + resolv_copy=/root/resolvconf-$nn + + # this condition should never happen, just coding defensively + if mexec mountpoint /run/resolvconf &>/dev/null; then + mexec umount /run/resolvconf + fi + cp -aT /run/resolvconf $resolv_copy + if ! mexec mount -o bind $resolv_copy /run/resolvconf; then + echo "error: resolv-conf bindmount failed" + exit 1 + fi + # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback + # in the network namespace, so adjust the address. + if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then + mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq + mexec resolvconf -u + fi + # background: if we did this in openvpn's resolv-conf script, we could guard it in + # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null + # and we could get $nn by + # config_basename=${config%%.*} + # config_basename=${config_basename##*/} + # but dnsmasq forces us to do it earlier. + ###### end setup resolvconf + + } stop() { @@ -208,9 +311,16 @@ stop() { if ! $existing; then if nat -C &>/dev/null; then nat -D; fi fi - if $create; then + dexec iptables -D FORWARD -i $v0 -j ACCEPT ||: + if $create && [[ -e /var/run/netns/client ]]; then ip netns del $nn fi + + # not sure this is necessary since we are tearing down the mount namespace + if mexec mountpoint /run/resolvconf &>/dev/null; then + mexec umount /run/resolvconf + fi + if mountpoint /root/mount_namespaces/$nn >/dev/null; then umount /root/mount_namespaces/$nn fi