[[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@"
+tmp="$(readlink -f "${BASH_SOURCE}")"; script_dir="${tmp%/*}"
if [[ ! $ERRHANDLE_PATH ]]; then
- ERRHANDLE_PATH=$(readlink -f "${BASH_SOURCE}")
- ERRHANDLE_PATH=$(readlink -f ${ERRHANDLE_PATH%/*}/../errhandle)
+ ERRHANDLE_PATH="$script_dir"/../errhandle/err
fi
-err_sourced=true
-for p in $ERRHANDLE_PATH/{errcatch-function,bash-trace-function}; do
- if [[ -e $p ]]; then
- source $p
- else
- err_sourced=false
- fi
-done
-if $err_sourced; then
- errcatch
+if [[ -s $ERRHANDLE_PATH ]]; then
+ source $ERRHANDLE_PATH
else
- set -eE -o pipefail
- trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
+ cd "$script_dir"
+ if ! wget -O err 'https://iankelling.org/git/?p=errhandle;a=blob_plain;f=err;hb=HEAD'; then
+ echo "$0: failed to get errhandle dependency" >&2
+ exit 1
+ fi
+ source err
fi
usage() {
- cat <<EOF
+ cat <<EOF
usage: ${0##*/} [OPTS] start|stop NS_NAME
-Setup & optionally create, a network namespace with nat and a mount namespace
+Nat a network namespace. systemd friendly
+
+Also creates a mount namespace with a cloned /run/resolvconf.
-c, --create Create a named network namespace. When running from
the same network namespace as pid 1, this is set automatically.
- This is the case when running outside a systemd created
- private network.
+ A systemd created private network is in a network namespace
+ different than pid 1.
+-n NETWORK x.x.x /24 private network to use. If not specified, uses
+ the first unused one starting at 10.173.1
-h, --help Show this help and exit.
-From within a systemd network namespace, nat it to the outside. This
+From a normal shell:
+
+If we do create the netns, to join it with a shell, we can do (as root)
+/usr/bin/nsenter --mount=/root/mount_namespaces/NAME --net=/var/run/netns/NAME bash
+
+If you dont care about the mount namespace, you can leave that option off.
+
+
+For systemd:
+
+From within a systemd network namespace, we nat it to the outside. This
would be called from ExecStartPre, and or subsequent units called with
JoinsNamespaceOf= and PrivateNetwork=true.
-Uses /24 network, finding the first locally unused one starting at
-10.173.0.
-
-Also create a named mount namespace under /root/mount_namespaces, so we
-can alter some system config for this namespace. Subsequent systemd
+We also create a named mount namespace under /root/mount_namespaces, so we
+can alter some system config for this namespace. systemd
command lines would be prefixed with:
/usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
will update the script to that the mount namespace not created unless a
flag is passed in. Patch welcome to add that flag before then.
-A recommmended dependency of this script is my other repo named "errhandle",
-which prints stack trace on error, and calls a cleanup function:
-https://iankelling.org/git/?p=errhandle, set ERRHANDLE_PATH, or put it
-in a directory adjacent to the absolute, resolved directory this file is
-in.
+This script has a dependency which you can download manually or it
+will be automatically downloaded into the same directory.
+It handles errors by printing stack trace and and cleaning up the namespaces.
+To download manually,
+git clone https://iankelling.org/git/errhandle
+into an adjacent directory, or
+export ERRHANDLE_PATH to point to the 'err' file in that repo.
+
+
+Background on this project (you can skip if you like):
-Background:
+If we aren't creating a named network namespace, to join the namespace
+with a shell, I use:
+nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash
-This script does not make the namespace be named like ip does, because
-the naming is not necessary, although it could have been done with some
-more work. For debugging and joining the namespace with a bash shell, I
-use nsenter -n -m -t $(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
-knew how to easily ask systemd what pid a unit has, i would do that.
+Note: if I knew how to easily ask systemd what pid a unit has, i would
+do that.
"ip netns new ..." also does a mount namespace, then bind
mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
copy /run/resolvconf somehwere then bind mount it on top of
/run/resolvconf.
+
+Note: for debugging, adding set -x is a pretty good option.
+
Please email me if you have a patches, bugs, feedback, or republish this
somewhere else: Ian Kelling <ian@iankelling.org>.
EOF
- exit ${1:-0}
+ exit ${1:-0}
}
#### begin arg parsing ####
create=false
-temp=$(getopt -l help,create hc "$@") || usage 1
+temp=$(getopt -l help,create hcn: "$@") || usage 1
eval set -- "$temp"
while true; do
- case $1 in
- -c|--create) create=true; shift ;;
- -h|--help) usage ;;
- --) shift; break ;;
- *) echo "$0: Internal error!" ; exit 1 ;;
- esac
+ case $1 in
+ -c|--create) create=true; shift ;;
+ -n) network=$2; shift 2 ;;
+ -h|--help) usage ;;
+ --) shift; break ;;
+ *) echo "$0: Internal error!" ; exit 1 ;;
+ esac
done
if (( $# != 2 )); then
- usage 1
+ usage 1
fi
action=$1
#### begin sanity checking ####
install_error=false
if ! type -p ip &>/dev/null; then
- echo "please install the iproute2 package"
- install_error=true
+ echo "please install the iproute2 package"
+ install_error=true
fi
if ! type -p iptables &>/dev/null; then
- echo "please install the iptables package"
- install_error=true
+ echo "please install the iptables package"
+ install_error=true
fi
if $install_error; then
- exit 1
+ exit 1
fi
#### end sanity checking ####
ip_base=10.173
if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
- create=true
+ create=true
fi
# make the default network namespace be named
target=/run/netns/default
if [[ ! -e $target && ! -L $target ]]; then
- mkdir -p /run/netns
- ln -s /proc/1/ns/net $target
+ mkdir -p /run/netns
+ ln -s /proc/1/ns/net $target
fi
ipd() { ip -n default "$@"; }
if $create; then
- ipnn() { ip -n $nn "$@"; }
+ # run ip in the network namespace
+ ipnn() { ip -n $nn "$@"; }
else
- # we are already in the network namespace and it's unnamed.
- ipnn() { ip "$@"; }
+ # we are already in the network namespace and it's unnamed.
+ # run ip in the network namespace
+ ipnn() { ip "$@"; }
fi
+# default network namespace exec
dexec() { ip netns exec default "$@"; }
+# mount namespace exec
+mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; }
# background: head -n1 is defensive. Not sure if there is some weird feature
# for 2 routes to be 0/0.
-gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*\s(\S+)\s*$/\1/')
-nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
- -m comment --comment "systemd network namespace nat"; }
+gateway_ifs=($(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/'))
+
+if [[ ! $gateway_ifs ]]; then
+ cat >&2 <<EOF
+$0: error: failed to find gateway interface. No output from:
+ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/'
+output from "ipd route list exact 0/0":
+$(ipd route list exact 0/0)
+EOF
+ exit 1
+fi
+
+nat() {
+ for if in ${gateway_ifs[@]}; do
+ dexec iptables -t nat $1 POSTROUTING -o $if -j MASQUERADE \
+ -m comment --comment "systemd network namespace nat"
+ done
+}
+
+# d = default
+diptables-add() {
+ if ! dexec iptables -C "$@" &>/dev/null; then
+ dexec iptables -I "$@"
+ fi
+
+}
find_network() {
- found=false
- existing=false
- ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
- for ((i=0; i <= 254; i++)); do
- network=$ip_base.$i
- if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
- existing=true
- else
- found=true
- break
- fi
- done
+ if [[ $network ]]; then
+ return
+ fi
+ found=false
+ existing=false
+ ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
+ for ((i=1; i <= 254; i++)); do
+ network=$ip_base.$i
+ if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
+ existing=true
+ else
+ found=true
+ break
+ fi
+ done
}
start() {
- find_network
- if ! $found; then
- echo "$0: error: no open network found"
- exit 1
+ find_network
+ if ! $found; then
+ echo "$0: error: no open network found"
+ exit 1
+ fi
+
+ #### begin mount namespace setup ####
+ mkdir -p /root/mount_namespaces
+ if ! mountpoint /root/mount_namespaces >/dev/null; then
+ mount --bind /root/mount_namespaces /root/mount_namespaces
+ fi
+ # note: This is outside the mount condition because I've mysteriously
+ # had this become shared instead of private, perhaps it
+ # got remounted somehow and lost the setting.
+ mount --make-private /root/mount_namespaces
+ if [[ ! -e /root/mount_namespaces/$nn ]]; then
+ touch /root/mount_namespaces/$nn
+ fi
+ if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
+ # Here, we specify that we only want mount changes changes under
+ # this mountpoint to be propagated into the bind, but changes
+ # from within the bind do not propagate to outside the bind.
+ #
+ # slave is documented in.
+ # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz
+ # documentation on propagation is a bit weird because it
+ # confusingly talks about binds, namespaces, and mirrors (which
+ # seems to be just another name for bind), shared subtrees
+ # (which seems to a term for binds and namespaces), and does not
+ # properly specify whether the documentation applies to binds,
+ # namespaces, or both. Notably, propagation for binds is marked
+ # on the original mount point, and propagation for a mount
+ # namespace is marked on mounts within the namespace.
+ unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true
+ fi
+
+ #### end mount namespace setup ####
+
+
+ if $create; then
+ ip netns add $nn
+ ip -n $nn link set dev lo up
+ fi
+
+ echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
+
+ # docker helpfully changes the default FORWARD to drop...
+ diptables-add FORWARD -i $v0 -j ACCEPT
+ diptables-add FORWARD -o $v0 -j ACCEPT
+
+
+ _errcatch_cleanup=stop
+ ipnn link add $v0 type veth peer name $v1
+ ipnn link set $v0 netns default
+ ipd addr add $network.1/24 dev $v0
+ ipd link set $v0 up
+ nat -C &>/dev/null || nat -A
+ ipnn addr add $network.2/24 dev $v1
+ ipnn link set $v1 up
+ ipnn route add default via $network.1
+
+ ###### begin setup resolvconf
+ if [[ -e /run/resolvconf ]]; then # resolvconf probably not installed
+ resolv_copy=/root/resolvconf-$nn
+
+ # this condition should never happen, just coding defensively
+ if mexec mountpoint /run/resolvconf &>/dev/null; then
+ mexec umount /run/resolvconf
fi
-
- #### begin mount namespace setup ####
- mkdir -p /root/mount_namespaces
- if ! mountpoint /root/mount_namespaces >/dev/null; then
- mount --bind /root/mount_namespaces /root/mount_namespaces
- mount --make-private /root/mount_namespaces
+ cp -aT /run/resolvconf $resolv_copy
+ if ! mexec mount -o bind $resolv_copy /run/resolvconf; then
+ echo "error: resolv-conf bindmount failed"
+ exit 1
fi
- if [[ ! -e /root/mount_namespaces/$nn ]]; then
- touch /root/mount_namespaces/$nn
+ # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback
+ # in the network namespace, so adjust the address.
+ if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then
+ mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq
+ mexec resolvconf -u
fi
- if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
- unshare --mount=/root/mount_namespaces/$nn
+ # and in debian based distros at least, it runs with --local-service, and needs a restart
+ # to know about the new local network
+ if [[ $(systemctl --no-pager show -p ActiveState dnsmasq ) == ActiveState=active ]]; then
+ systemctl restart dnsmasq
fi
- #### end mount namespace setup ####
-
- if $create; then
- ip netns add $nn
- ip -n $nn link set dev lo up
- fi
+ # background: if we did this in openvpn's resolv-conf script, we could guard it in
+ # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null
+ # and we could get $nn by
+ # config_basename=${config%%.*}
+ # config_basename=${config_basename##*/}
+ # but dnsmasq forces us to do it earlier.
- echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
+ fi # end if [[ -e /run/resolvconf ]]
+ ###### end setup resolvconf
- _errcatch_cleanup=stop
- ipnn link add $v0 type veth peer name $v1
- ipnn link set $v0 netns default
- ipd addr add $network.1/24 dev $v0
- ipd link set $v0 up
- nat -C &>/dev/null || nat -A
- ipnn addr add $network.2/24 dev $v1
- ipnn link set $v1 up
- ipnn route add default via $network.1
}
stop() {
- if ipd link list $v0 &>/dev/null; then
- # this also deletes $v1 and the route we added.
- ipd link del $v0
- fi
- find_network
- if ! $existing; then
- if nat -C &>/dev/null; then nat -D; fi
- fi
- if $create; then
- ip netns del $nn
- fi
- if mountpoint /root/mount_namespaces/$nn >/dev/null; then
- umount /root/mount_namespaces/$nn
- fi
+ if ipd link list $v0 &>/dev/null; then
+ # this also deletes $v1 and the route we added.
+ ipd link del $v0
+ fi
+ find_network
+ if ! $existing; then
+ if nat -C &>/dev/null; then nat -D; fi
+ fi
+ dexec iptables -D FORWARD -i $v0 -j ACCEPT &>/dev/null ||:
+ if $create && [[ -e /var/run/netns/$nn ]]; then
+ ip netns del $nn
+ fi
+
+ # not sure this is necessary since we are tearing down the mount namespace
+ if mexec mountpoint /run/resolvconf &>/dev/null; then
+ mexec umount /run/resolvconf
+ fi
+
+ if mountpoint /root/mount_namespaces/$nn >/dev/null; then
+ umount /root/mount_namespaces/$nn
+ fi
}
case $action in
- start|stop)
- $action
- ;;
- *)
- echo "$0: error: unsupported action"
- exit 1
- ;;
+ start|stop)
+ $action
+ ;;
+ *)
+ echo "$0: error: unsupported action"
+ exit 1
+ ;;
esac