fix parsing ip on more kinds of networks
[newns] / newns
1 #!/bin/bash
2 # Copyright (C) 2017 Ian Kelling
3
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7
8 # http://www.apache.org/licenses/LICENSE-2.0
9
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16
17 [[ $EUID == 0 ]] || exec sudo -E "$BASH_SOURCE" "$@"
18
19 if [[ ! $ERRHANDLE_PATH ]]; then
20 ERRHANDLE_PATH=$(readlink -f "${BASH_SOURCE}")
21 ERRHANDLE_PATH=$(readlink -f ${ERRHANDLE_PATH%/*}/../errhandle)
22 fi
23 err_sourced=true
24 for p in $ERRHANDLE_PATH/{errcatch-function,bash-trace-function}; do
25 if [[ -e $p ]]; then
26 source $p
27 else
28 err_sourced=false
29 fi
30 done
31 if $err_sourced; then
32 errcatch
33 else
34 set -eE -o pipefail
35 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
36 fi
37
38 usage() {
39 cat <<EOF
40 usage: ${0##*/} [OPTS] start|stop NS_NAME
41 Nat a network namespace. create a mount ns. systemd friendly
42
43 -c, --create Create a named network namespace. When running from
44 the same network namespace as pid 1, this is set automatically.
45 This is the case when running outside a systemd created
46 private network.
47 -n NETWORK x.x.x /24 private network to use. If not specified, uses
48 the first one starting at 10.173.1
49 -h, --help Show this help and exit.
50
51 From within a systemd network namespace, nat it to the outside. This
52 would be called from ExecStartPre, and or subsequent units called with
53 JoinsNamespaceOf= and PrivateNetwork=true.
54
55 Also create a named mount namespace under /root/mount_namespaces, so we
56 can alter some system config for this namespace. Subsequent systemd
57 command lines would be prefixed with:
58
59 /usr/bin/nsenter --mount=/root/mount_namespaces/NS_NAME
60
61 Note, this means that they can't run as unpriveledged users, but once
62 systemd 233 comes out, it will have a bind mount option from within unit
63 files, so the mount namespace won't be needed for most use cases, and I
64 will update the script to that the mount namespace not created unless a
65 flag is passed in. Patch welcome to add that flag before then.
66
67 A recommmended dependency of this script is my other repo named "errhandle",
68 which prints stack trace on error, and calls a cleanup function:
69 https://iankelling.org/git/?p=errhandle, set ERRHANDLE_PATH, or put it
70 in a directory adjacent to the absolute, resolved directory this file is
71 in.
72
73 Background:
74
75 This script does not make the namespace be named like ip does, because
76 the naming is not necessary, although it could have been done with some
77 more work. For debugging and joining the namespace with a bash shell, I
78 use nsenter -n -m -t \$(pgrep PROCESS_IN_NAMESPACE) bash. Note: if I
79 knew how to easily ask systemd what pid a unit has, i would do that.
80
81 "ip netns new ..." also does a mount namespace, then bind
82 mounts each file/dir in /etc/netns/NS_NAME to /etc/NS_NAME. Note,
83 for openvpn having it's own resolv.conf by using it's user script which
84 calls resolvconf, this doesn't help much. What we actually want to do is
85 copy /run/resolvconf somehwere then bind mount it on top of
86 /run/resolvconf.
87
88 Please email me if you have a patches, bugs, feedback, or republish this
89 somewhere else: Ian Kelling <ian@iankelling.org>.
90 EOF
91 exit ${1:-0}
92 }
93
94
95 #### begin arg parsing ####
96 create=false
97 temp=$(getopt -l help,create hcn: "$@") || usage 1
98 eval set -- "$temp"
99 while true; do
100 case $1 in
101 -c|--create) create=true; shift ;;
102 -n) network=$2; shift 2 ;;
103 -h|--help) usage ;;
104 --) shift; break ;;
105 *) echo "$0: Internal error!" ; exit 1 ;;
106 esac
107 done
108 if (( $# != 2 )); then
109 usage 1
110 fi
111
112 action=$1
113 nn=$2 # namespace name
114 #### end arg parsing ####
115
116 #### begin sanity checking ####
117 install_error=false
118 if ! type -p ip &>/dev/null; then
119 echo "please install the iproute2 package"
120 install_error=true
121 fi
122 if ! type -p iptables &>/dev/null; then
123 echo "please install the iptables package"
124 install_error=true
125 fi
126 if $install_error; then
127 exit 1
128 fi
129 #### end sanity checking ####
130
131
132 v0=veth0-$nn
133 v1=veth1-$nn
134 ip_base=10.173
135
136 if ! $create && [[ $(readlink /proc/self/ns/net) == "$(readlink /proc/1/ns/net)" ]]; then
137 create=true
138 fi
139
140 # make the default network namespace be named
141 target=/run/netns/default
142 if [[ ! -e $target && ! -L $target ]]; then
143 mkdir -p /run/netns
144 ln -s /proc/1/ns/net $target
145 fi
146
147
148 ipd() { ip -n default "$@"; }
149 if $create; then
150 ipnn() { ip -n $nn "$@"; }
151 else
152 # we are already in the network namespace and it's unnamed.
153 ipnn() { ip "$@"; }
154 fi
155 dexec() { ip netns exec default "$@"; }
156
157
158 # background: head -n1 is defensive. Not sure if there is some weird feature
159 # for 2 routes to be 0/0.
160 gateway_if=$(ipd route list exact 0/0 | head -n1| sed -r 's/.*dev\s+(\S+).*/\1/')
161 nat() { dexec iptables -t nat $1 POSTROUTING -o $gateway_if -j MASQUERADE \
162 -m comment --comment "systemd network namespace nat"; }
163
164 find_network() {
165 if [[ $network ]]; then
166 return
167 fi
168 found=false
169 existing=false
170 ips="$(ipd addr show | awk '$1 == "inet" {print $2}')"
171 for ((i=1; i <= 254; i++)); do
172 network=$ip_base.$i
173 if printf "%s\n" "$ips" | grep "^${network//./\\.}" >/dev/null; then
174 existing=true
175 else
176 found=true
177 break
178 fi
179 done
180 }
181
182 start() {
183 find_network
184 if ! $found; then
185 echo "$0: error: no open network found"
186 exit 1
187 fi
188
189 #### begin mount namespace setup ####
190 mkdir -p /root/mount_namespaces
191 if ! mountpoint /root/mount_namespaces >/dev/null; then
192 mount --bind /root/mount_namespaces /root/mount_namespaces
193 fi
194 # note: This is outside the mount condition because I've mysteriously
195 # had this become shared instead of private, perhaps it
196 # got remounted somehow and lost the setting.
197 mount --make-private /root/mount_namespaces
198 if [[ ! -e /root/mount_namespaces/$nn ]]; then
199 touch /root/mount_namespaces/$nn
200 fi
201 if ! mountpoint /root/mount_namespaces/$nn >/dev/null; then
202 # documentation on propagation is a bit weird because it
203 # confusingly talks about binds, namespaces, and mirrors (which
204 # seems to be just another name for bind), shared subtrees
205 # (which seems to a term for binds and namespaces), and does not
206 # properly specify whether the documentation applies to binds,
207 # namespaces, or both. Notably, propagation for binds is marked
208 # on the original mount point, and propagation for a mount
209 # namespace is marked on mounts within the namespace. Here, we
210 # specify that we want mount changes propagated to us, but not
211 # back.
212 unshare --propagation slave --mount=/root/mount_namespaces/$nn
213 fi
214 #### end mount namespace setup ####
215
216
217 if $create; then
218 ip netns add $nn
219 ip -n $nn link set dev lo up
220 fi
221
222 echo 1 | dexec dd of=/proc/sys/net/ipv4/ip_forward 2>/dev/null
223
224 _errcatch_cleanup=stop
225 ipnn link add $v0 type veth peer name $v1
226 ipnn link set $v0 netns default
227 ipd addr add $network.1/24 dev $v0
228 ipd link set $v0 up
229 nat -C &>/dev/null || nat -A
230 ipnn addr add $network.2/24 dev $v1
231 ipnn link set $v1 up
232 ipnn route add default via $network.1
233
234 }
235
236 stop() {
237 if ipd link list $v0 &>/dev/null; then
238 # this also deletes $v1 and the route we added.
239 ipd link del $v0
240 fi
241 find_network
242 if ! $existing; then
243 if nat -C &>/dev/null; then nat -D; fi
244 fi
245 if $create; then
246 ip netns del $nn
247 fi
248 if mountpoint /root/mount_namespaces/$nn >/dev/null; then
249 umount /root/mount_namespaces/$nn
250 fi
251 }
252
253 case $action in
254 start|stop)
255 $action
256 ;;
257 *)
258 echo "$0: error: unsupported action"
259 exit 1
260 ;;
261 esac