change mount ns to slave
[newns] / newns
1 #!/bin/bash
2 # Copyright (C) 2017 Ian Kelling
3
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7
8 # http://www.apache.org/licenses/LICENSE-2.0
9
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16
17 [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@"
18
19 if [[ ! $ERRHANDLE_PATH ]]; then
20 ERRHANDLE_PATH=$(readlink -f "${BASH_SOURCE}")
21 ERRHANDLE_PATH=$(readlink -f ${ERRHANDLE_PATH%/*}/../errhandle)
22 fi
23 err_sourced=true
24 for p in $ERRHANDLE_PATH/{errcatch-function,bash-trace-function}; do
25 if [[ -e $p ]]; then
26 source $p
27 else
28 err_sourced=false
29 fi
30 done
31 if $err_sourced; then
32 errcatch
33 else
34 set -eE -o pipefail
35 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
36 fi
37
38 usage() {
39 cat <<EOF
40 usage: ${0##*/} [OPTS] start|stop NS_NAME
41 Nat a network namespace. create a mount ns. systemd friendly
42
43 -c, --create Create a named network namespace. When running from
44 the same network namespace as pid 1, this is set automatically.
45 This is the case when running outside a systemd created
46 private network.
47 -n NETWORK x.x.x /24 private network to use. If not specified, uses
48 the first one starting at 10.173.1
49 -h, --help Show this help and exit.
50
51 From within a systemd network namespace, nat it to the outside. This
52 would be called from ExecStartPre, and or subsequent units called with
53 JoinsNamespaceOf= and PrivateNetwork=true.
54
55 Also create a named mount namespace under /root/mount_namespaces, so we
56 can alter some system config for this namespace. Subsequent systemd
57 command lines would be prefixed with:
58
59 /usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
60
61 Note, this means that they can't run as unpriveledged users, but once
62 systemd 233 comes out, it will have a bind mount option from within unit
63 files, so the mount namespace won't be needed for most use cases, and I
64 will update the script to that the mount namespace not created unless a
65 flag is passed in. Patch welcome to add that flag before then.
66
67 A recommmended dependency of this script is my other repo named "errhandle",
68 which prints stack trace on error, and calls a cleanup function:
69 https://iankelling.org/git/?p=errhandle, set ERRHANDLE_PATH, or put it
70 in a directory adjacent to the absolute, resolved directory this file is
71 in.
72
73 Background:
74
75 This script does not make the namespace be named like ip does, because
76 the naming is not necessary, although it could have been done with some
77 more work. For debugging and joining the namespace with a bash shell, I
78 use nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
79 knew how to easily ask systemd what pid a unit has, i would do that.
80
81 "ip netns new ..." also does a mount namespace, then bind
82 mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
83 for openvpn having it's own resolv.conf by using it's user script which
84 calls resolvconf, this doesn't help much. What we actually want to do is
85 copy /run/resolvconf somehwere then bind mount it on top of
86 /run/resolvconf.
87
88 Please email me if you have a patches, bugs, feedback, or republish this
89 somewhere else: Ian Kelling <ian@iankelling.org>.
90 EOF
91 exit ${1:-0}
92 }
93
94
95 #### begin arg parsing ####
96 create=false
97 temp=$(getopt -l help,create hcn: "$@") || usage 1
98 eval set -- "$temp"
99 while true; do
100 case $1 in
101 -c|--create) create=true; shift ;;
102 -n) network=$2; shift 2 ;;
103 -h|--help) usage ;;
104 --) shift; break ;;
105 *) echo "$0: Internal error!" ; exit 1 ;;
106 esac
107 done
108 if (( $# != 2 )); then
109 usage 1
110 fi
111
112 action=$1
113 nn=$2 # namespace name
114 #### end arg parsing ####
115
116 #### begin sanity checking ####
117 install_error=false
118 if ! type -p ip &>/dev/null; then
119 echo "please install the iproute2 package"
120 install_error=true
121 fi
122 if ! type -p iptables &>/dev/null; then
123 echo "please install the iptables package"
124 install_error=true
125 fi
126 if $install_error; then
127 exit 1
128 fi
129 #### end sanity checking ####
130
131
132 v0=veth0-$nn
133 v1=veth1-$nn
134 ip_base=10.173
135
136 if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
137 create=true
138 fi
139
140 # make the default network namespace be named
141 target=/run/netns/default
142 if [[ ! -e $target && ! -L $target ]]; then
143 mkdir -p /run/netns
144 ln -s /proc/1/ns/net $target
145 fi
146
147
148 ipd() { ip -n default "$@"; }
149 if $create; then
150 ipnn() { ip -n $nn "$@"; }
151 else
152 # we are already in the network namespace and it's unnamed.
153 ipnn() { ip "$@"; }
154 fi
155 dexec() { ip netns exec default "$@"; }
156
157
158 # background: head -n1 is defensive. Not sure if there is some weird feature
159 # for 2 routes to be 0/0.
160 gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*\s(\S+)\s*$/\1/')
161 nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
162 -m comment --comment "systemd network namespace nat"; }
163
164 find_network() {
165 if [[ $network ]]; then
166 return
167 fi
168 found=false
169 existing=false
170 ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
171 for ((i=1; i <= 254; i++)); do
172 network=$ip_base.$i
173 if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
174 existing=true
175 else
176 found=true
177 break
178 fi
179 done
180 }
181
182 start() {
183 find_network
184 if ! $found; then
185 echo "$0: error: no open network found"
186 exit 1
187 fi
188
189 #### begin mount namespace setup ####
190 mkdir -p /root/mount_namespaces
191 if ! mountpoint /root/mount_namespaces >/dev/null; then
192 mount --bind /root/mount_namespaces /root/mount_namespaces
193 mount --make-private /root/mount_namespaces
194 fi
195 if [[ ! -e /root/mount_namespaces/$nn ]]; then
196 touch /root/mount_namespaces/$nn
197 fi
198 if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
199 # documentation on propagation is a bit weird because it
200 # confusingly talks about binds, namespaces, and mirrors (which
201 # seems to be just another name for bind), shared subtrees
202 # (which seems to a term for binds and namespaces), and does not
203 # properly specify whether the documentation applies to binds,
204 # namespaces, or both. Notably, propagation for binds is marked
205 # on the original mount point, and propagation for a mount
206 # namespace is marked on mounts within the namespace. Here, we
207 # specify that we want mount changes propagated to us, but not
208 # back.
209 unshare --propagation slave --mount=/root/mount_namespaces/$nn
210 fi
211 #### end mount namespace setup ####
212
213
214 if $create; then
215 ip netns add $nn
216 ip -n $nn link set dev lo up
217 fi
218
219 echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
220
221 _errcatch_cleanup=stop
222 ipnn link add $v0 type veth peer name $v1
223 ipnn link set $v0 netns default
224 ipd addr add $network.1/24 dev $v0
225 ipd link set $v0 up
226 nat -C &>/dev/null || nat -A
227 ipnn addr add $network.2/24 dev $v1
228 ipnn link set $v1 up
229 ipnn route add default via $network.1
230
231 }
232
233 stop() {
234 if ipd link list $v0 &>/dev/null; then
235 # this also deletes $v1 and the route we added.
236 ipd link del $v0
237 fi
238 find_network
239 if ! $existing; then
240 if nat -C &>/dev/null; then nat -D; fi
241 fi
242 if $create; then
243 ip netns del $nn
244 fi
245 if mountpoint /root/mount_namespaces/$nn >/dev/null; then
246 umount /root/mount_namespaces/$nn
247 fi
248 }
249
250 case $action in
251 start|stop)
252 $action
253 ;;
254 *)
255 echo "$0: error: unsupported action"
256 exit 1
257 ;;
258 esac