small readme update
[newns] / newns
1 #!/bin/bash
2 # Copyright (C) 2017 Ian Kelling
3
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7
8 # http://www.apache.org/licenses/LICENSE-2.0
9
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16
17 [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@"
18
19 if [[ ! $ERRHANDLE_PATH ]]; then
20 ERRHANDLE_PATH=$(readlink -f "${BASH_SOURCE}")
21 ERRHANDLE_PATH=$(readlink -f ${ERRHANDLE_PATH%/*}/../errhandle)
22 fi
23 err_sourced=true
24 for p in $ERRHANDLE_PATH/{errcatch-function,bash-trace-function}; do
25 if [[ -e $p ]]; then
26 source $p
27 else
28 err_sourced=false
29 fi
30 done
31 if $err_sourced; then
32 errcatch
33 else
34 set -eE -o pipefail
35 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
36 fi
37
38 usage() {
39 cat <<EOF
40 usage: ${0##*/} [OPTS] start|stop NS_NAME
41 Nat a network namespace. systemd friendly
42
43 Also creates a mount namespace with a cloned /etc/resolv.conf.
44
45 -c, --create Create a named network namespace. When running from
46 the same network namespace as pid 1, this is set automatically.
47 This is the case when running outside a systemd created
48 private network.
49 -n NETWORK x.x.x /24 private network to use. If not specified, uses
50 the first one starting at 10.173.1
51 -h, --help Show this help and exit.
52
53 From within a systemd network namespace, nat it to the outside. This
54 would be called from ExecStartPre, and or subsequent units called with
55 JoinsNamespaceOf= and PrivateNetwork=true.
56
57 Also create a named mount namespace under /root/mount_namespaces, so we
58 can alter some system config for this namespace. Subsequent systemd
59 command lines would be prefixed with:
60
61 /usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
62
63 Note, this means that they can't run as unpriveledged users, but once
64 systemd 233 comes out, it will have a bind mount option from within unit
65 files, so the mount namespace won't be needed for most use cases, and I
66 will update the script to that the mount namespace not created unless a
67 flag is passed in. Patch welcome to add that flag before then.
68
69 A recommmended dependency of this script is my other repo named "errhandle",
70 which prints stack trace on error, and calls a cleanup function:
71 https://iankelling.org/git/?p=errhandle, set ERRHANDLE_PATH, or put it
72 in a directory adjacent to the absolute, resolved directory this file is
73 in.
74
75 Background:
76
77 This script does not make the namespace be named like ip does, because
78 the naming is not necessary, although it could have been done with some
79 more work. For debugging and joining the namespace with a bash shell, I
80 use nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
81 knew how to easily ask systemd what pid a unit has, i would do that.
82
83 "ip netns new ..." also does a mount namespace, then bind
84 mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
85 for openvpn having it's own resolv.conf by using it's user script which
86 calls resolvconf, this doesn't help much. What we actually want to do is
87 copy /run/resolvconf somehwere then bind mount it on top of
88 /run/resolvconf.
89
90 Please email me if you have a patches, bugs, feedback, or republish this
91 somewhere else: Ian Kelling <ian@iankelling.org>.
92 EOF
93 exit ${1:-0}
94 }
95
96
97 #### begin arg parsing ####
98 create=false
99 temp=$(getopt -l help,create hcn: "$@") || usage 1
100 eval set -- "$temp"
101 while true; do
102 case $1 in
103 -c|--create) create=true; shift ;;
104 -n) network=$2; shift 2 ;;
105 -h|--help) usage ;;
106 --) shift; break ;;
107 *) echo "$0: Internal error!" ; exit 1 ;;
108 esac
109 done
110 if (( $# != 2 )); then
111 usage 1
112 fi
113
114 action=$1
115 nn=$2 # namespace name
116 #### end arg parsing ####
117
118 #### begin sanity checking ####
119 install_error=false
120 if ! type -p ip &>/dev/null; then
121 echo "please install the iproute2 package"
122 install_error=true
123 fi
124 if ! type -p iptables &>/dev/null; then
125 echo "please install the iptables package"
126 install_error=true
127 fi
128 if $install_error; then
129 exit 1
130 fi
131 #### end sanity checking ####
132
133
134 v0=veth0-$nn
135 v1=veth1-$nn
136 ip_base=10.173
137
138 if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
139 create=true
140 fi
141
142 # make the default network namespace be named
143 target=/run/netns/default
144 if [[ ! -e $target && ! -L $target ]]; then
145 mkdir -p /run/netns
146 ln -s /proc/1/ns/net $target
147 fi
148
149
150 ipd() { ip -n default "$@"; }
151 if $create; then
152 ipnn() { ip -n $nn "$@"; }
153 else
154 # we are already in the network namespace and it's unnamed.
155 ipnn() { ip "$@"; }
156 fi
157 dexec() { ip netns exec default "$@"; }
158
159
160 # background: head -n1 is defensive. Not sure if there is some weird feature
161 # for 2 routes to be 0/0.
162 gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')
163 nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
164 -m comment --comment "systemd network namespace nat"; }
165
166 find_network() {
167 if [[ $network ]]; then
168 return
169 fi
170 found=false
171 existing=false
172 ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
173 for ((i=1; i <= 254; i++)); do
174 network=$ip_base.$i
175 if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
176 existing=true
177 else
178 found=true
179 break
180 fi
181 done
182 }
183
184 start() {
185 find_network
186 if ! $found; then
187 echo "$0: error: no open network found"
188 exit 1
189 fi
190
191 #### begin mount namespace setup ####
192 mkdir -p /root/mount_namespaces
193 if ! mountpoint /root/mount_namespaces >/dev/null; then
194 mount --bind /root/mount_namespaces /root/mount_namespaces
195 fi
196 # note: This is outside the mount condition because I've mysteriously
197 # had this become shared instead of private, perhaps it
198 # got remounted somehow and lost the setting.
199 mount --make-private /root/mount_namespaces
200 if [[ ! -e /root/mount_namespaces/$nn ]]; then
201 touch /root/mount_namespaces/$nn
202 fi
203 if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
204 # documentation on propagation is a bit weird because it
205 # confusingly talks about binds, namespaces, and mirrors (which
206 # seems to be just another name for bind), shared subtrees
207 # (which seems to a term for binds and namespaces), and does not
208 # properly specify whether the documentation applies to binds,
209 # namespaces, or both. Notably, propagation for binds is marked
210 # on the original mount point, and propagation for a mount
211 # namespace is marked on mounts within the namespace. Here, we
212 # specify that we want mount changes propagated to us, but not
213 # back.
214 unshare --propagation slave --mount=/root/mount_namespaces/$nn
215 fi
216 #### end mount namespace setup ####
217
218
219 if $create; then
220 ip netns add $nn
221 ip -n $nn link set dev lo up
222 fi
223
224 echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
225
226 _errcatch_cleanup=stop
227 ipnn link add $v0 type veth peer name $v1
228 ipnn link set $v0 netns default
229 ipd addr add $network.1/24 dev $v0
230 ipd link set $v0 up
231 nat -C &>/dev/null || nat -A
232 ipnn addr add $network.2/24 dev $v1
233 ipnn link set $v1 up
234 ipnn route add default via $network.1
235
236 }
237
238 stop() {
239 if ipd link list $v0 &>/dev/null; then
240 # this also deletes $v1 and the route we added.
241 ipd link del $v0
242 fi
243 find_network
244 if ! $existing; then
245 if nat -C &>/dev/null; then nat -D; fi
246 fi
247 if $create; then
248 ip netns del $nn
249 fi
250 if mountpoint /root/mount_namespaces/$nn >/dev/null; then
251 umount /root/mount_namespaces/$nn
252 fi
253 }
254
255 case $action in
256 start|stop)
257 $action
258 ;;
259 *)
260 echo "$0: error: unsupported action"
261 exit 1
262 ;;
263 esac