#!/bin/bash # Copyright (C) 2017 Ian Kelling # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@" tmp="$(readlink -f "${BASH_SOURCE}")"; script_dir="${tmp%/*}" if [[ ! $ERRHANDLE_PATH ]]; then ERRHANDLE_PATH="$script_dir"/../errhandle/err fi if [[ -s $ERRHANDLE_PATH ]]; then source $ERRHANDLE_PATH else cd "$script_dir" if ! wget -O err 'https://iankelling.org/git/?p=errhandle;a=blob_plain;f=err;hb=HEAD'; then echo "$0: failed to get errhandle dependency" >&2 exit 1 fi source err fi usage() { cat <. EOF exit ${1:-0} } #### begin arg parsing #### create=false temp=$(getopt -l help,create hcn: "$@") || usage 1 eval set -- "$temp" while true; do case $1 in -c|--create) create=true; shift ;; -n) network=$2; shift 2 ;; -h|--help) usage ;; --) shift; break ;; *) echo "$0: Internal error!" ; exit 1 ;; esac done if (( $# != 2 )); then usage 1 fi action=$1 nn=$2 # namespace name #### end arg parsing #### #### begin sanity checking #### install_error=false if ! type -p ip &>/dev/null; then echo "please install the iproute2 package" install_error=true fi if ! type -p iptables &>/dev/null; then echo "please install the iptables package" install_error=true fi if $install_error; then exit 1 fi #### end sanity checking #### v0=veth0-$nn v1=veth1-$nn ip_base=10.173 if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then create=true fi # make the default network namespace be named target=/run/netns/default if [[ ! -e $target && ! -L $target ]]; then mkdir -p /run/netns ln -s /proc/1/ns/net $target fi ipd() { ip -n default "$@"; } if $create; then # run ip in the network namespace ipnn() { ip -n $nn "$@"; } else # we are already in the network namespace and it's unnamed. # run ip in the network namespace ipnn() { ip "$@"; } fi # default network namespace exec dexec() { ip netns exec default "$@"; } # mount namespace exec mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; } # background: head -n1 is defensive. Not sure if there is some weird feature # for 2 routes to be 0/0. gateway_ifs=($(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')) if [[ ! $gateway_ifs ]]; then cat >&2 </dev/null; then dexec iptables -I "$@" fi } find_network() { if [[ $network ]]; then return fi found=false existing=false ips="$(ipd addr show | awk '$1 == "inet" {print $2}')" for ((i=1; i <= 254; i++)); do network=$ip_base.$i if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then existing=true else found=true break fi done } start() { find_network if ! $found; then echo "$0: error: no open network found" exit 1 fi #### begin mount namespace setup #### mkdir -p /root/mount_namespaces if ! mountpoint /root/mount_namespaces >/dev/null; then mount --bind /root/mount_namespaces /root/mount_namespaces fi # note: This is outside the mount condition because I've mysteriously # had this become shared instead of private, perhaps it # got remounted somehow and lost the setting. mount --make-private /root/mount_namespaces if [[ ! -e /root/mount_namespaces/$nn ]]; then touch /root/mount_namespaces/$nn fi if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then # Here, we specify that we only want mount changes changes under # this mountpoint to be propagated into the bind, but changes # from within the bind do not propagate to outside the bind. # # slave is documented in. # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz # documentation on propagation is a bit weird because it # confusingly talks about binds, namespaces, and mirrors (which # seems to be just another name for bind), shared subtrees # (which seems to a term for binds and namespaces), and does not # properly specify whether the documentation applies to binds, # namespaces, or both. Notably, propagation for binds is marked # on the original mount point, and propagation for a mount # namespace is marked on mounts within the namespace. unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true fi #### end mount namespace setup #### if $create; then ip netns add $nn ip -n $nn link set dev lo up fi echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null # docker helpfully changes the default FORWARD to drop... diptables-add FORWARD -i $v0 -j ACCEPT diptables-add FORWARD -o $v0 -j ACCEPT _errcatch_cleanup=stop ipnn link add $v0 type veth peer name $v1 ipnn link set $v0 netns default ipd addr add $network.1/24 dev $v0 ipd link set $v0 up nat -C &>/dev/null || nat -A ipnn addr add $network.2/24 dev $v1 ipnn link set $v1 up ipnn route add default via $network.1 ###### begin setup resolvconf resolv_copy=/root/resolvconf-$nn # this condition should never happen, just coding defensively if mexec mountpoint /run/resolvconf &>/dev/null; then mexec umount /run/resolvconf fi cp -aT /run/resolvconf $resolv_copy if ! mexec mount -o bind $resolv_copy /run/resolvconf; then echo "error: resolv-conf bindmount failed" exit 1 fi # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback # in the network namespace, so adjust the address. if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq mexec resolvconf -u fi # and in debian based distros at least, it runs with --local-service, and needs a restart # to know about the new local network if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then systemctl restart dnsmasq fi # background: if we did this in openvpn's resolv-conf script, we could guard it in # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null # and we could get $nn by # config_basename=${config%%.*} # config_basename=${config_basename##*/} # but dnsmasq forces us to do it earlier. ###### end setup resolvconf } stop() { if ipd link list $v0 &>/dev/null; then # this also deletes $v1 and the route we added. ipd link del $v0 fi find_network if ! $existing; then if nat -C &>/dev/null; then nat -D; fi fi dexec iptables -D FORWARD -i $v0 -j ACCEPT ||: if $create && [[ -e /var/run/netns/client ]]; then ip netns del $nn fi # not sure this is necessary since we are tearing down the mount namespace if mexec mountpoint /run/resolvconf &>/dev/null; then mexec umount /run/resolvconf fi if mountpoint /root/mount_namespaces/$nn >/dev/null; then umount /root/mount_namespaces/$nn fi } case $action in start|stop) $action ;; *) echo "$0: error: unsupported action" exit 1 ;; esac