2 # Copyright (C) 2017 Ian Kelling
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
17 [[ $EUID == 0 ]] ||
exec sudo
-E "$BASH_SOURCE" "$@"
19 if [[ ! $ERRHANDLE_PATH ]]; then
20 ERRHANDLE_PATH
=$
(readlink
-f "${BASH_SOURCE}")
21 ERRHANDLE_PATH
=$
(readlink
-f ${ERRHANDLE_PATH%/*}/..
/errhandle
)
24 for p
in $ERRHANDLE_PATH/{errcatch-function
,bash-trace-function
}; do
35 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
40 usage: ${0##*/} [OPTS] start|stop NS_NAME
41 Nat a network namespace. systemd friendly
43 Also creates a mount namespace with a cloned /run/resolvconf.
45 -c, --create Create a named network namespace. When running from
46 the same network namespace as pid 1, this is set automatically.
47 A systemd created private network is in a network namespace
49 -n NETWORK x.x.x /24 private network to use. If not specified, uses
50 the first one starting at 10.173.1
51 -h, --help Show this help and exit.
53 From within a systemd network namespace, nat it to the outside. This
54 would be called from ExecStartPre, and or subsequent units called with
55 JoinsNamespaceOf= and PrivateNetwork=true.
57 Also create a named mount namespace under /root/mount_namespaces, so we
58 can alter some system config for this namespace. Subsequent systemd
59 command lines would be prefixed with:
61 /usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
63 Note, this means that they can't run as unpriveledged users, but once
64 systemd 233 comes out, it will have a bind mount option from within unit
65 files, so the mount namespace won't be needed for most use cases, and I
66 will update the script to that the mount namespace not created unless a
67 flag is passed in. Patch welcome to add that flag before then.
69 A recommmended dependency of this script is my other repo named "errhandle",
70 which prints stack trace on error, and calls a cleanup function:
71 https://iankelling.org/git/?p=errhandle, set ERRHANDLE_PATH, or put it
72 in a directory adjacent to the absolute, resolved directory this file is
77 If we aren't creating a named network namespace, to join the namespace
79 nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash
81 Note: if I knew how to easily ask systemd what pid a unit has, i would
84 If we do create the netns, to join it with a shell, we can do
85 /usr/bin/nsenter --mount=/root/mount_namespaces/NAME --net=/var/run/netns/NAME bash
87 "ip netns new ..." also does a mount namespace, then bind
88 mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
89 for openvpn having it's own resolv.conf by using it's user script which
90 calls resolvconf, this doesn't help much. What we actually want to do is
91 copy /run/resolvconf somehwere then bind mount it on top of
94 Note: for debugging, adding set -x is a pretty good option.
96 Please email me if you have a patches, bugs, feedback, or republish this
97 somewhere else: Ian Kelling <ian@iankelling.org>.
103 #### begin arg parsing ####
105 temp
=$
(getopt
-l help,create hcn
: "$@") || usage
1
109 -c|
--create) create
=true
; shift ;;
110 -n) network
=$2; shift 2 ;;
113 *) echo "$0: Internal error!" ; exit 1 ;;
116 if (( $# != 2 )); then
121 nn
=$2 # namespace name
122 #### end arg parsing ####
124 #### begin sanity checking ####
126 if ! type -p ip
&>/dev
/null
; then
127 echo "please install the iproute2 package"
130 if ! type -p iptables
&>/dev
/null
; then
131 echo "please install the iptables package"
134 if $install_error; then
137 #### end sanity checking ####
144 if ! $create && [[ $
(readlink
/proc
/self
/ns
/net
) == "$(readlink /proc/1/ns/net)" ]]; then
148 # make the default network namespace be named
149 target
=/run
/netns
/default
150 if [[ ! -e $target && ! -L $target ]]; then
152 ln -s /proc
/1/ns
/net
$target
156 ipd
() { ip
-n default
"$@"; }
158 # run ip in the network namespace
159 ipnn
() { ip
-n $nn "$@"; }
161 # we are already in the network namespace and it's unnamed.
162 # run ip in the network namespace
165 # default network namespace exec
166 dexec
() { ip netns
exec default
"$@"; }
167 # mount namespace exec
168 mexec
() { /usr
/bin
/nsenter
--mount=/root
/mount_namespaces
/$nn "$@"; }
171 # background: head -n1 is defensive. Not sure if there is some weird feature
172 # for 2 routes to be 0/0.
173 gateway_if
=$
(ipd route list exact
0/0 |
head -n1|
sed -r 's/.*dev\s+(\S+).*/\1/')
175 if [[ ! $gateway_if ]]; then
177 $0: error: failed to find gateway interface. No output from:
178 ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/'
179 output from "ipd route list exact 0/0":
180 $(ipd route list exact 0/0)
185 nat
() { dexec iptables
-t nat
$1 POSTROUTING
-o $gateway_if -j MASQUERADE \
186 -m comment
--comment "systemd network namespace nat"; }
190 if ! dexec iptables
-C "$@" &>/dev
/null
; then
191 dexec iptables
-I "$@"
197 if [[ $network ]]; then
202 ips
="$(ipd addr show | awk '$1 == "inet
" {print $2}')"
203 for ((i
=1; i
<= 254; i
++)); do
205 if printf "%s\n" "$ips" |
grep "^${network//./\\.}" >/dev
/null
; then
217 echo "$0: error: no open network found"
221 #### begin mount namespace setup ####
222 mkdir
-p /root
/mount_namespaces
223 if ! mountpoint
/root
/mount_namespaces
>/dev
/null
; then
224 mount
--bind /root
/mount_namespaces
/root
/mount_namespaces
226 # note: This is outside the mount condition because I've mysteriously
227 # had this become shared instead of private, perhaps it
228 # got remounted somehow and lost the setting.
229 mount
--make-private /root
/mount_namespaces
230 if [[ ! -e /root
/mount_namespaces
/$nn ]]; then
231 touch /root
/mount_namespaces
/$nn
233 if ! mountpoint
/root
/mount_namespaces
/$nn >/dev
/null
; then
234 # Here, we specify that we only want mount changes changes under
235 # this mountpoint to be propagated into the bind, but changes
236 # from within the bind do not propagate to outside the bind.
238 # slave is documented in.
239 # /usr/share/doc/linux-doc-4.9/Documentation/filesystems/sharedsubtree.txt.gz
240 # documentation on propagation is a bit weird because it
241 # confusingly talks about binds, namespaces, and mirrors (which
242 # seems to be just another name for bind), shared subtrees
243 # (which seems to a term for binds and namespaces), and does not
244 # properly specify whether the documentation applies to binds,
245 # namespaces, or both. Notably, propagation for binds is marked
246 # on the original mount point, and propagation for a mount
247 # namespace is marked on mounts within the namespace.
248 unshare
--propagation slave
--mount=/root
/mount_namespaces
/$nn /bin
/true
251 #### end mount namespace setup ####
256 ip
-n $nn link
set dev lo up
259 echo 1 | dexec
dd of
=/proc
/sys
/net
/ipv
4/ip_forward
2>/dev
/null
261 # docker helpfully changes the default FORWARD to drop...
262 diptables-add FORWARD
-i $v0 -j ACCEPT
263 diptables-add FORWARD
-o $v0 -m conntrack
--ctstate RELATED
,ESTABLISHED
-j ACCEPT
266 _errcatch_cleanup
=stop
267 ipnn link add
$v0 type veth peer name
$v1
268 ipnn link
set $v0 netns default
269 ipd addr add
$network.1/24 dev
$v0
271 nat
-C &>/dev
/null || nat
-A
272 ipnn addr add
$network.2/24 dev
$v1
274 ipnn route add default via
$network.1
276 ###### begin setup resolvconf
277 resolv_copy
=/root
/resolvconf-
$nn
279 # this condition should never happen, just coding defensively
280 if mexec mountpoint
/run
/resolvconf
&>/dev
/null
; then
281 mexec umount
/run
/resolvconf
283 cp -aT /run
/resolvconf
$resolv_copy
284 if ! mexec mount
-o bind $resolv_copy /run
/resolvconf
; then
285 echo "error: resolv-conf bindmount failed"
288 # if running dnsmasq, we have 127.0.0.1 for dns, but it can't listen on the loopback
289 # in the network namespace, so adjust the address.
290 if mexec
[ -s /run
/resolvconf
/interface
/lo.dnsmasq
]; then
291 mexec
sed --follow-symlinks -i "s/nameserver 127\..*/nameserver $network.1/" /run
/resolvconf
/interface
/lo.dnsmasq
294 # background: if we did this in openvpn's resolv-conf script, we could guard it in
295 # if capsh --print|grep '\bcap_sys_admin\b' &>/dev/null
296 # and we could get $nn by
297 # config_basename=${config%%.*}
298 # config_basename=${config_basename##*/}
299 # but dnsmasq forces us to do it earlier.
300 ###### end setup resolvconf
306 if ipd link list
$v0 &>/dev
/null
; then
307 # this also deletes $v1 and the route we added.
312 if nat
-C &>/dev
/null
; then nat
-D; fi
314 dexec iptables
-D FORWARD
-i $v0 -j ACCEPT ||
:
315 if $create && [[ -e /var
/run
/netns
/client
]]; then
319 # not sure this is necessary since we are tearing down the mount namespace
320 if mexec mountpoint
/run
/resolvconf
&>/dev
/null
; then
321 mexec umount
/run
/resolvconf
324 if mountpoint
/root
/mount_namespaces
/$nn >/dev
/null
; then
325 umount
/root
/mount_namespaces
/$nn
334 echo "$0: error: unsupported action"