# https://savannah.nongnu.org/projects/bash-bear-trap/
set -e; . /usr/local/lib/bash-bear; set +e
+m() {
+ local out
+ printf "newns: %s\n" "$*"
+ if ! out=$("$@" 2>&1); then
+ echo "newns: WARNING: last command exit code: $?"
+ elif [[ ! $out ]]; then
+ echo "newns: WARNING: no output from last command"
+ fi
+}
usage() {
cat <<EOF
-usage: ${0##*/} [OPTS] start|stop NS_NAME
+usage: ${0##*/} [OPTS] start|stop|show NS_NAME
Nat a network namespace. systemd friendly
Also creates a mount namespace with a cloned /run/resolvconf.
+Arguments:
+
+start|stop: these do what they say.
+
+show: Show the state we expected to be there or not there based on
+start/stop. This is useful for debugging.
+
+NS_NAME: We use this to name the interfaces we create, the mount
+namespace, and if we are creating a named network space, that too.
+
-c, --create Create or destroy a named network namespace. When running from
the same network namespace as pid 1, this is set automatically.
- A systemd created private network is in a network namespace
- different than pid 1.
+ A systemd created private network is in an unnamed network namespace
+ different than pid 1. I haven't found a need for a named network
+ namespace in that case.
-n NETWORK x.x.x /24 private network to use. If not specified, uses
the first unused one starting at 10.173.1
-h, --help Show this help and exit.
From a normal shell:
If we do create the netns, to join it with a shell, we can do (as root)
-/usr/bin/nsenter --mount=/root/mount_namespaces/NAME --net=/var/run/netns/NAME bash
+/usr/bin/nsenter --mount=/run/mount-namespaces/NAME --net=/var/run/netns/NAME bash
If you dont care about the mount namespace, you can leave that option off.
JoinsNamespaceOf= and PrivateNetwork=true.
If resolvconf is installed, we create a named mount namespace under
-/root/mount_namespaces, so we can alter some system config for this
+/run/mount-namespaces, so we can alter some system config for this
namespace. systemd command lines would be prefixed with:
-/usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
+/usr/bin/nsenter --mount=/run/mount-namespaces/NS_NAME
Note, this means that they can't run as unpriveledged users, but once
systemd 233 comes out, it will have a bind mount option from within unit
Note: for debugging, adding set -x is a pretty good option.
+TODO: make "start" be idempotent.
+
Please email me if you have a patches, bugs, feedback, or republish this
somewhere else: Ian Kelling <ian@iankelling.org>.
EOF
#### begin arg parsing ####
create=false
-temp=$(getopt -l help,create hcn: "$@") || usage 1
+temp=$(getopt -l help,create hcdn: "$@") || usage 1
eval set -- "$temp"
while true; do
case $1 in
v1=veth1-$nn
ip_base=10.173
-if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
- create=true
-fi
-
-# make the default network namespace be named
+### begin make the default network namespace be named "default" ###
mkdir -p /run/netns
target=/run/netns/default
if [[ ! -e $target && ! -L $target ]]; then
# -f to avoid a race condition with running twice
ln -sf /proc/1/ns/net $target
fi
+### end make the default network namespace be named "default" ###
-ipd() { ip -n default "$@"; }
-
-
+if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
+ create=true
+fi
# otherwise we are already in the network namespace and it's unnamed.
if $create; then
ipnnargs="-n $nn"
fi
+
+
+ipd() { ip -n default "$@"; }
+
# run ip in the network namespace
ipnn() { ip $ipnnargs "$@"; }
# default network namespace exec
dexec() { ip netns exec default "$@"; }
# mount namespace exec
-mexec() { /usr/bin/nsenter --mount=/root/mount_namespaces/$nn "$@"; }
+mexec() { /usr/bin/nsenter --mount=/run/mount-namespaces/$nn "$@"; }
nat() {
- # note, in a previous commit i specified the output interface with -o,
+ # Note: duplicated in show()
+ # Note, in a previous commit i specified the output interface with -o,
# but that broke things when my gateway interface changed, and I can't
# see any advantage to it, so I removed it.
dexec iptables -t nat $1 POSTROUTING -s $network.0/24 -j MASQUERADE \
}
-find_network() {
+find-network() {
if [[ $network ]]; then
return
fi
found=false
- existing=false
ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
for ((i=1; i <= 254; i++)); do
network=$ip_base.$i
- if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
- existing=true
- else
+ if ! printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
found=true
break
fi
done
-}
-
-start() {
- find_network
if ! $found; then
echo "$0: error: no open network found"
exit 1
fi
+}
+
+start() {
+ find-network
#### begin mount namespace setup ####
- mkdir -p /root/mount_namespaces
- if ! mountpoint /root/mount_namespaces >/dev/null; then
- mount --bind /root/mount_namespaces /root/mount_namespaces
+ mkdir -p /run/mount-namespaces
+ if ! mountpoint /run/mount-namespaces >/dev/null; then
+ mount --bind /run/mount-namespaces /run/mount-namespaces
fi
# note: This is outside the mount condition because I've mysteriously
# had this become shared instead of private, perhaps it
# got remounted somehow and lost the setting.
- mount --make-private /root/mount_namespaces
- if [[ ! -e /root/mount_namespaces/$nn ]]; then
- touch /root/mount_namespaces/$nn
+ mount --make-private /run/mount-namespaces
+ if [[ ! -e /run/mount-namespaces/$nn ]]; then
+ touch /run/mount-namespaces/$nn
fi
- if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
+ if ! mountpoint /run/mount-namespaces/$nn >/dev/null; then
# Here, we specify that we only want mount changes changes under
# this mountpoint to be propagated into the bind, but changes
# from within the bind do not propagate to outside the bind.
# documentation on propagation is a bit weird because it
# confusingly talks about binds, namespaces, and mirrors (which
# seems to be just another name for bind), shared subtrees
- # (which seems to a term for binds and namespaces), and does not
+ # (which seems to be a term for binds and namespaces), and does not
# properly specify whether the documentation applies to binds,
# namespaces, or both. Notably, propagation for binds is marked
# on the original mount point, and propagation for a mount
# namespace is marked on mounts within the namespace.
- unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true
+ unshare --propagation slave --mount=/run/mount-namespaces/$nn /bin/true
fi
#### end mount namespace setup ####
nat -C &>/dev/null || nat -A
ipnn addr add $network.2/24 dev $v1
ipnn link set $v1 up
- ipnn route add default via $network.1
+ cmd="ipnn route add default via $network.1"
+ $cmd
+ fails=0
+ max_fails=2
+ # I've had adding the default route mysteriously fail on boot, so
+ # here we check that it succeeded, do a sleep and a retry.
+ while true; do
+ default_route=$(ipnn route show default | sed -r 's,^[[:space:]]+|[[:space:]]+$,,')
+ if [[ $default_route != "default via $network.1 dev $v1" ]]; then
+ fails=$((fails + 1))
+ else
+ break
+ fi
+ if (( fails >= max_fails )); then
+ echo "$0: ERROR: default route added but not found, retried $max_fails. expected route: 'default via $network.1 dev $v1', found: '$default_route'"
+ # Note: for debugging, if you have a systemd unit which tears down
+ # the newns upon failure, you may want to uncomment the break so
+ # that we proceed and can inspect the system. break
+ exit 1
+ else
+ sleep 1
+ $cmd
+ fi
+ done
+ if (( fails >= 1 )); then
+ echo "$0: WARNING: route added but not found until retried $max_fails times: $cmd"
+ fi
+
###### begin setup resolvconf
if [[ -e /run/resolvconf ]]; then # resolvconf probably installed
fi # end if [[ -e /run/resolvconf ]]
###### end setup resolvconf
-
-
}
stop() {
+ if [[ ! $network ]]; then
+ network=$(ipd -f inet a show dev $v0 2>/dev/null | awk '/inet / {print $2}' | sed -r 's,\.[0-9]+/.*,,' ||:)
+ fi
if ipd link list $v0 &>/dev/null; then
# this also deletes $v1 and the route we added.
ipd link del $v0
fi
- find_network
- if ! $existing; then
- if nat -C &>/dev/null; then nat -D; fi
- fi
+ if [[ $network ]] && nat -C &>/dev/null; then nat -D; fi
dexec iptables -D FORWARD -i $v0 -j ACCEPT &>/dev/null ||:
if $create && [[ -e /var/run/netns/$nn ]]; then
ip netns del $nn
mexec umount /run/resolvconf
fi
- if mountpoint /root/mount_namespaces/$nn >/dev/null; then
- umount /root/mount_namespaces/$nn
+ if mountpoint /run/mount-namespaces/$nn >/dev/null; then
+ umount /run/mount-namespaces/$nn
fi
}
+show() {
+ m ipd link list $v0
+ m dexec iptables -t nat -C POSTROUTING -s $network.0/24 -j MASQUERADE \
+ -m comment --comment "systemd network namespace nat" ||:
+ m dexec iptables -C FORWARD -i $v0 -j ACCEPT
+ m mexec mountpoint /run/resolvconf
+ m mountpoint /run/mount-namespaces/$nn
+}
+
case $action in
- start|stop)
+ start|stop|show)
$action
;;
*)