Background:
-This script does not make the namespace be named like ip does, because
-the naming is not necessary, although it could have been done with some
-more work. For debugging and joining the namespace with a bash shell, I
-use nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
-knew how to easily ask systemd what pid a unit has, i would do that.
+If we aren't creating a named network namespace, to join the namespace
+with a shell, I use:
+nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash
+
+Note: if I knew how to easily ask systemd what pid a unit has, i would
+do that.
+
+If we do create the netns, to join it with a shell, we can do
+/usr/bin/nsenter --mount=/root/mount_namespaces/NAME --net=/var/run/netns/NAME bash
"ip netns new ..." also does a mount namespace, then bind
mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
# background: head -n1 is defensive. Not sure if there is some weird feature
# for 2 routes to be 0/0.
gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')
+
+if [[ ! $gateway_if ]]; then
+ cat >&2 <<EOF
+$0: error: failed to find gateway interface. No output from:
+ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/'
+output from "ipd route list exact 0/0":
+$(ipd route list exact 0/0)
+EOF
+ exit 1
+fi
+
nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
-m comment --comment "systemd network namespace nat"; }
+# d = default
+diptables-add() {
+ if ! dexec iptables -C "$@" &>/dev/null; then
+ dexec iptables -I "$@"
+ fi
+
+}
+
find_network() {
if [[ $network ]]; then
return
touch /root/mount_namespaces/$nn
fi
if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
+ # Here, we specify that we only want mount changes changes under
+ # this mountpoint to be propagated into the bind, but changes
+ # from within the bind do not propagate to outside the bind.
+ #
+ # slave is documented in.
+ # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz
# documentation on propagation is a bit weird because it
# confusingly talks about binds, namespaces, and mirrors (which
# seems to be just another name for bind), shared subtrees
# properly specify whether the documentation applies to binds,
# namespaces, or both. Notably, propagation for binds is marked
# on the original mount point, and propagation for a mount
- # namespace is marked on mounts within the namespace. Here, we
- # specify that we want mount changes propagated to us, but not
- # back.
+ # namespace is marked on mounts within the namespace.
unshare --propagation slave --mount=/root/mount_namespaces/$nn /bin/true
fi
echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
# docker helpfully changes the default FORWARD to drop...
- if ! dexec iptables -C FORWARD -i $v0 -j ACCEPT &>/dev/null; then
- dexec iptables -A FORWARD -i $v0 -j ACCEPT
- fi
+ diptables-add FORWARD -i $v0 -j ACCEPT
+ diptables-add FORWARD -o $v0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
+
_errcatch_cleanup=stop
ipnn link add $v0 type veth peer name $v1
if nat -C &>/dev/null; then nat -D; fi
fi
dexec iptables -D FORWARD -i $v0 -j ACCEPT ||:
- if $create; then
+ if $create && [[ -e /var/run/netns/client ]]; then
ip netns del $nn
fi