46887e9712d1aa558fb08bcab3be976a4fce6723
[automated-distro-installer] / fai / config / hooks / partition.DEFAULT
1 #!/bin/bash -x
2 # Copyright (C) 2016 Ian Kelling
3
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # of the License, or (at your option) any later version.
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17
18 set -eE -o pipefail
19 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
20
21 # # fai's setup-storage won't do btrfs on luks,
22 # # so we do it ourself :)
23 # inspiration taken from files in fai-setup-storage package
24
25
26 skiptask partition || ! type skiptask # for running not in fai
27
28 #### begin configuration
29
30 rootn=1
31 swapn=2
32 bootn=3
33 # ext partition so grub can write persistent variables,
34 # so it can do a one time boot.
35 grub_extn=4
36 # bios boot partition,
37 # https://wiki.archlinux.org/index.php/GRUB
38 bios_grubn=5
39 lastn=$bios_grubn
40 boot_mib=10000
41
42
43 ##### end configuration
44
45
46 add-part() { # add partition suffix to $dev
47 local d ret
48 if [[ $# == 1 ]]; then
49 d=$dev
50 part=$1
51 else
52 d=$1
53 part=$2
54 fi
55 if [[ $d == /dev/disk/by-id/* ]]; then
56 ret=$d-part$part
57 else
58 ret=$d$part
59 fi
60 echo $ret
61 }
62
63 bootdev() { add-part $@ $bootn; }
64 rootdev() { add-part $@ $rootn; }
65 swapdev() { add-part $@ $swapn; }
66 grub_extdev() { add-part $@ $grub_extn; }
67 # Commented because it's not used, but left because it
68 # finishes the pattern and if we ever do need to use it, it's here.
69 #bios_grubdev() { add-part $@ $bios_grubn; }
70
71 crypt-dev() { echo /dev/mapper/crypt_dev_${1##*/}; }
72 crypt-name() { echo crypt_dev_${1##*/}; }
73 root-cryptdev() { crypt-dev $(rootdev $@); }
74 swap-cryptdev() { crypt-dev $(swapdev $@); }
75 root-cryptname() { crypt-name $(rootdev $@); }
76 swap-cryptname() { crypt-name $(swapdev $@); }
77
78
79 ##### end function defs
80
81 if ifclass REPARTITION;then
82 partition=true # force a full wipe
83 else
84 partition=false # change to true to force a full wipe
85 fi
86
87
88
89 hdds=()
90 ssds=()
91 cd /sys/block
92 for disk in [sv]d[a-z]; do
93 case $(cat $disk/queue/rotational) in
94 0) ssds+=(/dev/$disk) ;;
95 1) hdds+=(/dev/$disk) ;;
96 *) echo "$0: error: unknown /sys/block/$disk/queue/rotational: \
97 $(cat $disk/queue/rotational)"; exit 1 ;;
98 esac
99 done
100
101 # install all ssds, or if there are none, all hdds
102 if ! ifclass ROTATIONAL && (( ${#ssds[@]} > 0 )); then
103 short_devs=( ${ssds[@]} )
104 else
105 short_devs=( ${hdds[@]} )
106 fi
107
108 # check if the partitions exist have the right filesystems
109 #blkid="$(blkid -s TYPE)"
110 for dev in ${short_devs[@]}; do
111 if $partition; then break; fi
112 y=$(readlink -f $dev)
113 x=($y[0-9])
114 [[ ${#x[@]} == "${lastn}" ]] || partition=true
115 for (( i=1; i <= lastn; i++ )); do
116 [[ -e ${dev}$i ]] || partition=true
117 done
118 # On one system, blkid is missing some partitions.
119 # maybe we need a flag, like FUZZY_BLKID or something, so we
120 # can check that at least some exist.
121 # for x in "`rootdev`: TYPE=\"crypto_LUKS\"" "`bootdev`: TYPE=\"btrfs\""; do
122 # echo "$blkid" | grep -Fx "$x" &>/dev/null || partition=true
123 # done
124 done
125
126 if $partition && ifclass PARTITION_PROMPT; then
127 echo "Press any key except ctrl-c to continue and partition these drives:"
128 echo " ${short_devs[*]}"
129 read -r
130 fi
131
132 devs=()
133 shopt -s extglob
134 for short_dev in ${short_devs[@]}; do
135 devs+=($(devbyid $short_dev))
136 done
137
138
139 first=false
140 boot_devs=()
141 for dev in ${devs[@]}; do
142 if ifclass frodo; then
143 # I ran into a machine where the bios doesn't know about some disks,
144 # so 1st stage of grub also doesn't know about them.
145 # Also, grub does not support mounting degraded btrfs as far as
146 # I can tell with some googling.
147 # From within an arch install env, I could detect them by noting
148 # their partitions were mixed with the next disk in /dev/disk/by-path,
149 # and I have mixed model disks, and I could see the 8 models which showed
150 # up in the bios, and thus see which 2 models were missing.
151 # hdparm -I /dev/sdh will give model info in linux.
152 # However, in fai on jessie, /dev/disk/by-path dir doesn't exist,
153 # and I don't see another way, so I'm hardcoding them.
154 # We still put grub on them and partition them the same, for uniformity
155 # and in case they get moved to a system that can recognize them,
156 # we just exclude them from the boot filesystem.
157 cd /dev/disk/by-id/
158 bad_disk=false
159 for id in ata-TOSHIBA_MD04ACA500_8539K4TQFS9A \
160 ata-TOSHIBA_MD04ACA500_Y5IFK6IJFS9A; do
161 if [[ $(readlink -f $id) == "$(readlink -f $dev)" ]]; then
162 bad_disk=true
163 break
164 fi
165 done
166 $bad_disk || boot_devs+=(`bootdev`)
167 else
168 boot_devs+=(`bootdev`)
169 fi
170 if [[ $boot_devs && $first ]]; then
171 first_grub_extdev=`grub_extdev`
172 first=false
173 fi
174 done
175
176 if ifclass RAID0 || (( ${#boot_devs[@]} < 4 )); then
177 raid_level=0
178 else
179 raid_level=10
180 # need double the space if we are raid 10, and then
181 # might as well give some extra.
182 boot_mib=$((boot_mib * 3))
183 fi
184
185
186
187 if [[ ! $DISTRO ]]; then
188 if ifclass STABLE_BOOTSTRAP; then
189 DISTRO=debianstable_bootstrap
190 elif ifclass STRETCH64; then
191 DISTRO=debiantesting
192 elif ifclass STABLE; then
193 DISTRO=debianstable
194 elif ifclass XENIAL64; then
195 DISTRO=ubuntuxenial
196 elif ifclass BELENOS64; then
197 DISTRO=trisquelbelenos
198 else
199 echo "PARTITIONER ERROR: no distro class/var set" >&2
200 exit 1
201 fi
202 fi
203 first_boot_dev=${boot_devs[0]}
204
205
206 bpart() { # btrfs a partition
207 case $raid_level in
208 0) mkfs.btrfs -f $@ ;;
209 10) mkfs.btrfs -f -m raid10 -d raid10 $@ ;;
210 esac
211 }
212
213
214 # keyfiles generated like:
215 # head -c 2048 /dev/urandom | od | s dd of=/q/root/luks/host-demohost
216 luks_dir=${LUKS_DIR:-/var/lib/fai/config/distro-install-common/luks}
217
218 if [[ ! -e $luks_dir/host-$HOSTNAME ]]; then
219 echo "$0: error: no key for hostname at $luks_dir/host-$HOSTNAME" >&2
220 exit 1
221 fi
222
223 if ifclass tp; then
224 lukspw=$(cat $luks_dir/traci)
225 else
226 lukspw=$(cat $luks_dir/ian)
227 fi
228 if ifclass demohost; then
229 lukspw=x
230 fi
231
232
233 first_root_crypt=$(root-cryptdev ${devs[0]})
234
235 # 1.5 x based on https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/Installation_Guide/sect-disk-partitioning-setup-x86.html#sect-custom-partitioning-x86
236 swap_mib=$(( $(grep ^MemTotal: /proc/meminfo | \
237 awk '{print $2}') * 3/(${#devs[@]} * 2 ) / 1024 ))
238
239 mkdir -p /tmp/fai
240 root_devs=()
241 for dev in ${devs[@]}; do
242 root_devs+=(`rootdev`)
243 done
244 shopt -s nullglob
245 if $partition; then
246 for dev in ${devs[@]}; do
247 # if we repartition to the same as an old partition,
248 # we don't want any old fses hanging around.
249 for (( i=1; i <= lastn; i++ )); do
250 x=$(add-part $dev $i)
251 [[ -e $x ]] || continue
252 count_down=10
253 # wipefs has failed, manual run works, google suggests timing issue
254 while ! wipefs -a $x; do
255 sleep 2
256 count_down=$((count_down - 1))
257 (( count_down > 0 )) || exit 1
258 done
259 done
260 done
261 for dev in ${devs[@]}; do
262 # parted will round up the disk size. Do -1 so we can have
263 # fully 1MiB unit partitions for easy resizing of the last partition.
264 # Otherwise we would pass in -0 for the end argument for the last partition.
265 #
266 # parted print error output is expected. example:
267 # Error: /dev/vda: unrecognised disk label
268 disk_mib=$(( $(parted -m $dev unit MiB print | \
269 sed -nr "s#^/dev/[^:]+:([0-9]+).*#\1#p") - 1))
270 root_end=$(( disk_mib - swap_mib - boot_mib / ${#boot_devs[@]} ))
271 swap_end=$(( root_end + swap_mib))
272
273 parted -s $dev mklabel gpt
274 # MiB because parted complains about alignment otherwise.
275 pcmd="parted -a optimal -s -- $dev"
276 $pcmd mkpart primary "ext3" 12MiB ${root_end}MiB
277 $pcmd mkpart primary "linux-swap" ${root_end}MiB ${swap_end}MiB
278 $pcmd mkpart primary "" ${swap_end}MiB ${disk_mib}MiB
279 # i only need a few k, but googling min size,
280 # I found someone saying that gparted required
281 # required at least 8 because of their hard drive cylinder size.
282 # And 8 is still very tiny.
283 $pcmd mkpart primary "ext2" 4MiB 12MiB
284 # gpt ubuntu cloud image uses ~4 mb for this partition. fai uses 1 MiB.
285 # so, I use 3, whatever.
286 # note: parted manual saying cheap flash media
287 # should to start at 4.
288 $pcmd mkpart primary "" 1MiB 4MiB
289 $pcmd set $bios_grubn bios_grub on
290 $pcmd set $bootn boot on # generally not needed on modern systems
291 # the mkfs failed before on a vm, which prompted me to add
292 # sleep .1
293 # then it failed again on a physical machine
294 # with:
295 # Device /dev/disk/by-id/foo doesn't exist or access denied,
296 # so I added a wait until it existed.
297 # Then I added the mkfs.ext2, which claimed to succeed,
298 # but then couldn't be found upon reboot. In that case we didn't
299 # wait at all. So I've added a 3 second minimum wait.
300 sleep 3
301 secs=0
302 while [[ ! -e `rootdev` ]] && (( secs < 10 )); do
303 sleep 1
304 secs=$((secs +1))
305 done
306 # Holds just a single file, rarely written, so
307 # use ext2, like was often used for the /boot partition.
308 # This exists because grub can only persist data to a non-cow fs.
309 # And we use persisting a var in grub to do a one time boot.
310 # We could pass the data on the kernel command line and persist it
311 # to grubenv after booting, but that relies on the boot always succeeding.
312 # This is just a bit more robust, and it could work for booting
313 # into ipxe which can't persist data, if we ever got that working.
314 mkfs.ext2 `grub_extdev`
315 yes YES | cryptsetup luksFormat `rootdev` $luks_dir/host-$HOSTNAME \
316 -c aes-cbc-essiv:sha256 -s 256 || [[ $? == 141 ]]
317 yes "$lukspw" | \
318 cryptsetup luksAddKey --key-file $luks_dir/host-$HOSTNAME \
319 `rootdev` || [[ $? == 141 ]]
320 # background: Keyfile and password are treated just
321 # like 2 ways to input a passphrase, so we don't actually need to have
322 # different contents of keyfile and passphrase, but it makes some
323 # security sense to a really big randomly generated passphrase
324 # as much as possible, so we have both.
325 #
326 # This would remove the keyfile.
327 # yes 'test' | cryptsetup luksRemoveKey /dev/... \
328 # /key/file || [[ $? == 141 ]]
329
330 cryptsetup luksOpen `rootdev` `root-cryptname` \
331 --key-file $luks_dir/host-$HOSTNAME
332 done
333 ls -la /dev/btrfs-control # this was probably for debugging...
334 sleep 1
335 bpart $(for dev in ${devs[@]}; do root-cryptdev; done)
336 bpart ${boot_devs[@]}
337 else
338 for dev in ${devs[@]}; do
339 mkfs.ext2 `grub_extdev`
340 cryptsetup luksOpen `rootdev` `root-cryptname` \
341 --key-file $luks_dir/host-$HOSTNAME
342 done
343 sleep 1
344 fi
345
346
347 if [[ $DISTRO != debianstable_bootstrap ]]; then
348 # bootstrap distro doesn't use separate encrypted root.
349 mount -o subvolid=0 $first_root_crypt /mnt
350 # systemd creates subvolumes we want to delete.
351 s=($(btrfs subvolume list --sort=-path /mnt |
352 sed -rn "s#^.*path\s*(root_$DISTRO/\S+)\s*\$#\1#p"))
353 for subvol in ${s[@]}; do btrfs subvolume delete /mnt/$subvol; done
354 btrfs subvolume set-default 0 /mnt
355 [[ ! -e /mnt/root_$DISTRO ]] || btrfs subvolume delete /mnt/root_$DISTRO
356
357 ## create subvols ##
358 cd /mnt
359
360 btrfs subvolume create root_$DISTRO
361 [[ -e q ]] || btrfs subvolume create q
362 chown root:1000 q
363
364 mkdir -p /mnt/root_$DISTRO/boot
365 for x in q/a q/i; do
366 mkdir -p $x
367 chown 1000:1000 $x
368 chmod 755 $x
369 done
370 # could set default like this, but no reason to.
371 # btrfs subvolume set-default \
372 # $(btrfs subvolume list . | grep "root_$DISTRO$" | awk '{print $2}') .
373
374 # no cow on the root filesystem. it's setup is fully scripted,
375 # (immutable in buzzwords). if it messes up, we will just recreated it,
376 # and we can get better perf with this.
377 # I can't remember exactly why, but this is preferable to mounting with
378 # -o nodatacow, I think because subvolumes inherit that.
379 chattr -Rf +C root_$DISTRO
380 cd /
381 umount /mnt
382 fi
383
384 mount -o subvolid=0 $first_boot_dev /mnt
385 cd /mnt
386 btrfs subvolume set-default 0 /mnt # already default, just ensuring it.
387
388 # for libreboot systems.
389 mkdir -p /mnt/grub2
390 cp $FAI/distro-install-common/libreboot_grub.cfg /mnt/grub2
391
392 if [[ $DISTRO == debianstable_bootstrap ]]; then
393 # this is just convenience for the libreboot_grub config
394 # so we can glob the other ones easier.
395 boot_vol=$DISTRO
396 else
397 boot_vol=boot_$DISTRO
398 fi
399 [[ ! -e /mnt/$boot_vol ]] || btrfs subvolume delete /mnt/$boot_vol
400 btrfs subvolume create $boot_vol
401 cd /
402 umount /mnt
403 ## end create subvols ##
404
405 dev=${boot_devs[0]}
406 mount $first_grub_extdev /mnt
407 grub-editenv /mnt/grubenv set did_fai_check=true
408 grub-editenv /mnt/grubenv set last_boot=/$boot_vol
409 umount /mnt
410
411 if [[ $DISTRO == debianstable_bootstrap ]]; then
412 cat > /tmp/fai/fstab <<EOF
413 $first_boot_dev / btrfs noatime,subvol=$boot_vol 0 0
414 EOF
415 cat >/tmp/fai/disk_var.sh <<EOF
416 BOOT_DEVICE="${short_devs[@]}"
417 ROOT_PARTITION=$first_boot_dev
418 EOF
419 else
420 # note, the mount point /a seems to get automatically created somewhere
421 cat > /tmp/fai/fstab <<EOF
422 $first_root_crypt / btrfs noatime,subvol=root_$DISTRO 0 0
423 $first_root_crypt /q btrfs noatime,subvol=q 0 0
424 /q/a /a none bind 0 0
425 $first_boot_dev /boot btrfs noatime,subvol=$boot_vol 0 0
426 EOF
427
428 swaps=()
429 for dev in ${devs[@]}; do
430 swaps+=(`swap-cryptname`)
431 cat >>/tmp/fai/crypttab <<EOF
432 `root-cryptname` `rootdev` none keyscript=/root/keyscript,discard,luks
433 `swap-cryptname` `swapdev` /dev/urandom swap,cipher=aes-xts-plain64,size=256,hash=ripemd160
434 EOF
435 cat >> /tmp/fai/fstab <<EOF
436 `swap-cryptdev` none swap sw 0 0
437 EOF
438 done
439
440 # fai would do this:
441 #BOOT_DEVICE=\${BOOT_DEVICE:-"${devs[0]}"}
442
443 # note: swaplist seems to do nothing.
444 cat >/tmp/fai/disk_var.sh <<EOF
445 BOOT_DEVICE="${short_devs[@]}"
446 BOOT_PARTITION=\${BOOT_PARTITION:-$first_boot_dev}
447 # ROOT_PARTITIONS is added by me, used in arch setup.
448 ROOT_PARTITIONS="${root_devs[@]}"
449 ROOT_PARTITION=\${ROOT_PARTITION:-$first_root_crypt}
450 SWAPLIST=\${SWAPLIST:-"${swaps[@]}"}
451 EOF
452 fi