usage() {
cat <<EOF
usage: ${0##*/} [OPTS] start|stop NS_NAME
-Setup new or systemd created network namespace with nat and mount namespace
+Nat a network namespace. systemd friendly
+
+Also creates a mount namespace with a cloned /run/resolvconf.
-c, --create Create a named network namespace. When running from
the same network namespace as pid 1, this is set automatically.
- This is the case when running outside a systemd created
- private network.
+ A systemd created private network is in a network namespace
+ different than pid 1.
+-n NETWORK x.x.x /24 private network to use. If not specified, uses
+ the first one starting at 10.173.1
-h, --help Show this help and exit.
From within a systemd network namespace, nat it to the outside. This
would be called from ExecStartPre, and or subsequent units called with
JoinsNamespaceOf= and PrivateNetwork=true.
-If given -c, or if in the default network namespace, create a named
-network namepace natted to the current netns.
-
-Uses /24 network, finding the first locally unused one starting at
-10.173.0.
-
Also create a named mount namespace under /root/mount_namespaces, so we
can alter some system config for this namespace. Subsequent systemd
command lines would be prefixed with:
This script does not make the namespace be named like ip does, because
the naming is not necessary, although it could have been done with some
more work. For debugging and joining the namespace with a bash shell, I
-use nsenter -n -m -t $(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
+use nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
knew how to easily ask systemd what pid a unit has, i would do that.
"ip netns new ..." also does a mount namespace, then bind
copy /run/resolvconf somehwere then bind mount it on top of
/run/resolvconf.
+Note: for debugging, adding set -x is a pretty good option.
+
Please email me if you have a patches, bugs, feedback, or republish this
somewhere else: Ian Kelling <ian@iankelling.org>.
EOF
#### begin arg parsing ####
create=false
-temp=$(getopt -l help,create hc "$@") || usage 1
+temp=$(getopt -l help,create hcn: "$@") || usage 1
eval set -- "$temp"
while true; do
case $1 in
-c|--create) create=true; shift ;;
+ -n) network=$2; shift 2 ;;
-h|--help) usage ;;
--) shift; break ;;
*) echo "$0: Internal error!" ; exit 1 ;;
ipd() { ip -n default "$@"; }
if $create; then
+ # run ip in the network namespace
ipnn() { ip -n $nn "$@"; }
else
# we are already in the network namespace and it's unnamed.
+ # run ip in the network namespace
ipnn() { ip "$@"; }
fi
+# default network namespace exec
dexec() { ip netns exec default "$@"; }
+# mount namespace exec
+mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; }
# background: head -n1 is defensive. Not sure if there is some weird feature
# for 2 routes to be 0/0.
-gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*\s(\S+)\s*$/\1/')
+gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')
nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
-m comment --comment "systemd network namespace nat"; }
find_network() {
+ if [[ $network ]]; then
+ return
+ fi
found=false
existing=false
ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
- for ((i=0; i <= 254; i++)); do
+ for ((i=1; i <= 254; i++)); do
network=$ip_base.$i
if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
existing=true
mkdir -p /root/mount_namespaces
if ! mountpoint /root/mount_namespaces >/dev/null; then
mount --bind /root/mount_namespaces /root/mount_namespaces
- mount --make-private /root/mount_namespaces
fi
+ # note: This is outside the mount condition because I've mysteriously
+ # had this become shared instead of private, perhaps it
+ # got remounted somehow and lost the setting.
+ mount --make-private /root/mount_namespaces
if [[ ! -e /root/mount_namespaces/$nn ]]; then
touch /root/mount_namespaces/$nn
fi
if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
- unshare --mount=/root/mount_namespaces/$nn
+ # documentation on propagation is a bit weird because it
+ # confusingly talks about binds, namespaces, and mirrors (which
+ # seems to be just another name for bind), shared subtrees
+ # (which seems to a term for binds and namespaces), and does not
+ # properly specify whether the documentation applies to binds,
+ # namespaces, or both. Notably, propagation for binds is marked
+ # on the original mount point, and propagation for a mount
+ # namespace is marked on mounts within the namespace. Here, we
+ # specify that we want mount changes propagated to us, but not
+ # back.
+ unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true
fi
+
#### end mount namespace setup ####
echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
+ # docker helpfully changes the default FORWARD to drop...
+ if ! dexec iptables -C FORWARD -i $v0 -j ACCEPT &>/dev/null; then
+ dexec iptables -A FORWARD -i $v0 -j ACCEPT
+ fi
+
_errcatch_cleanup=stop
ipnn link add $v0 type veth peer name $v1
ipnn link set $v0 netns default
ipnn link set $v1 up
ipnn route add default via $network.1
+ ###### begin setup resolvconf
+ resolv_copy=/root/resolvconf-$nn
+
+ # this condition should never happen, just coding defensively
+ if mexec mountpoint /run/resolvconf &>/dev/null; then
+ mexec umount /run/resolvconf
+ fi
+ cp -aT /run/resolvconf $resolv_copy
+ if ! mexec mount -o bind $resolv_copy /run/resolvconf; then
+ echo "error: resolv-conf bindmount failed"
+ exit 1
+ fi
+ # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback
+ # in the network namespace, so adjust the address.
+ if mexec [ -s /run/resolvconf/interface/lo.dnsmasq ]; then
+ mexec sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run/resolvconf/interface/lo.dnsmasq
+ mexec resolvconf -u
+ fi
+ # background: if we did this in openvpn's resolv-conf script, we could guard it in
+ # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null
+ # and we could get $nn by
+ # config_basename=${config%%.*}
+ # config_basename=${config_basename##*/}
+ # but dnsmasq forces us to do it earlier.
+ ###### end setup resolvconf
+
+
}
stop() {
if ! $existing; then
if nat -C &>/dev/null; then nat -D; fi
fi
+ dexec iptables -D FORWARD -i $v0 -j ACCEPT ||:
if $create; then
ip netns del $nn
fi
+
+ # not sure this is necessary since we are tearing down the mount namespace
+ if mexec mountpoint /run/resolvconf &>/dev/null; then
+ mexec umount /run/resolvconf
+ fi
+
if mountpoint /root/mount_namespaces/$nn >/dev/null; then
umount /root/mount_namespaces/$nn
fi