add arg to just gen files, refactor, cleanup
[automated-distro-installer] / fai / config / hooks / partition.DEFAULT
1 #!/bin/bash -x
2 # Copyright (C) 2016 Ian Kelling
3
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # of the License, or (at your option) any later version.
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17
18 PS4='+ $LINENO '
19 set -eE -o pipefail
20 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
21
22 if [[ $EUID != 0 ]]; then
23 echo "$0: error: need to run as root" >&2
24 exit 1
25 fi
26
27 # for calling outside of FAI without args:
28 # fai-redep
29 #
30 # source /b/fai/fai-wrapper
31 # - set any appropriate classes with: fai-setclass OPT1... which sets CLASS_OPT1=true...
32 # or run eval-fai-classfile FILE.
33 # - Set a VOL_DISTROVER (if not doing mkroot2) eg:
34 # fai-setclass VOL_NABIA
35 #
36 # ARGS (only 1 is valid):
37 #
38 # mkroot2: for running outside of fai and setting up the root2/boot2 luks and btrfs and tab files
39 #
40 # mkroot2tab: for running outside of fai and setting up the root2/boot2 tab files, in case luks and btrfs
41 # happen to already be setup.
42 #
43 # mktab: for running outside of fai and generating a crypttab for
44 # the main root fs in /tmp/fai. Must run with env var, eg export DISTRO=trisquelnabia.
45 #
46 # Example use in a bootstrap distro:
47 # scp /a/bin/fai/fai/config/{distro-install-common/devbyid,hooks/partition.DEFAULT} root@HOST:
48 # sl HOST
49 # export DISTRO=trisquelnabia; ./partition.DEFAULT mktab
50 # ## cryptsetup wont take within a pipeline
51 # mapfile -t lines < <(awk '! /swap/ {print $2,$1}' /tmp/fai/crypttab )
52 # for l in "${lines[@]}"; do cryptsetup luksOpen $l; done
53 #
54 # # or alternatively, to avoid typing it many times:
55 # read -r lukspw; for l in "${lines[@]}"; do yes "$lukspw" | cryptsetup luksOpen $l; done
56
57 ## potentially useful later:
58 # sed 's#/root/keyscript,#decrypt_keyctl,#;s/$/,noauto/' /tmp/fai/crypttab >/etc/crypttab
59 #
60 # environment variables:
61 #
62 # HOSTNAME: if frodo, we exclude 2 devices from the /boot array, which
63 # the bios does not see. if demohost, we set the luks password to just
64 # 'x'.
65 #
66 # SPECIAL_DISK: For use outside of fai. A base disk name like
67 # /dev/sdk. If set, we just cryptsetup and partition this one disk then
68 # exit. This is useful for partitioning a disk in preparation to replace
69 # a failed or failing disk from a raid10 array.
70 #
71 # classes:
72 #
73 # REPARTITION: forces repartitioning even if we detect the proper amount
74 # of partitions already exist.
75 #
76 # NOWIPE: use existing subvolumes if they exist
77 #
78 # ROTATIONAL: forces to install onto hdds instead of sdds. normally sdds
79 # are chosen if they exist.
80 #
81 # PARTITION_PROMPT: command line prompt before partitioning
82 #
83 # RAID0: forces raid0 filesystem. Normally with 4+ devices, we use
84 # raid10.
85 # RAID1: forces raid1 filesystem.
86
87 mkroot2tab=false
88 mkroot2=false
89 mktab=false
90 if [[ $1 ]]; then
91 ## duplicates fai-wrapper, for convenience of not needing it
92 if ! type -t ifclass &>/dev/null; then
93 ifclass() {
94 local var=${1/#/CLASS_}
95 [[ $HOSTNAME == $1 || ${!var} ]]
96 }
97 fi
98
99 case $1 in
100 mkroot2)
101 mkroot2=true
102 ;;
103 mkroot2tab)
104 mkroot2tab=true
105 ;;
106 mktab)
107 mktab=true
108 ;;
109 *)
110 echo "$0: error: unsupported arg: $1" >&2
111 exit 1
112 ;;
113 esac
114 fi
115
116
117 if [[ $SPECIAL_DISK ]]; then
118 export CLASS_REPARTITION=true
119 fi
120
121 # # fai's setup-storage won't do btrfs on luks,
122 # # so we do it ourself :)
123 # inspiration taken from files in fai-setup-storage package
124
125 # if we are not running in fai, skiptask won't be defined, so carry on.
126 skiptask partition || ! type skiptask
127
128 if ! type -p devbyid; then
129 for d in $FAI/distro-install-common \
130 /a/bin/fai/fai/config/distro-install-common $FAI $PWD; do
131 [[ -d $d ]] || continue
132 if [[ -e $d/devbyid ]]; then
133 devbyid=$d/devbyid
134 devbyid() { $devbyid "$@"; }
135 break
136 fi
137 done
138 if [[ ! $devbyid ]]; then
139 echo "$0: error: failed to find devbyid script" >&2
140 exit 1
141 fi
142 fi
143
144
145
146 #### begin configuration
147
148 # this is the ordering of the /dev/sdaX, but
149 # the ordering of the partition layout goes like this:
150 # bios_grub
151 # grub_ext
152 # efi
153 # root
154 # swap
155 # boot
156
157 rootn=1
158 root2n=2
159 swapn=3
160 bootn=4
161 boot2n=5
162 efin=6
163 # ext partition so grub can write persistent variables,
164 # so it can do a one time boot. grub can't write to
165 # btrfs or any cow fs because it's more
166 # more complicated to do and they don't want to.
167 grub_extn=7
168 # bios boot partition,
169 # https://wiki.archlinux.org/index.php/GRUB
170 bios_grubn=8
171 even_bign=9
172 lastn=$bios_grubn
173
174
175
176 ##### end configuration
177
178 ##### begin function defs
179
180 add-part() { # add partition suffix to $dev
181 local d part
182 if [[ $# == 1 ]]; then
183 d=$dev
184 part=$1
185 else
186 d=$1
187 part=$2
188 fi
189 echo $d-part$part
190 }
191
192 rootdev() { add-part $@ $rootn; }
193 root2dev() { add-part $@ $root2n; }
194
195 # note, the following block could all have $@ like below
196 # But it is not added since it is not used and shellcheck
197 # rightly says args never used should not exist.
198 ##swapdev() { add-part $swapn; }
199
200 swapdev() { add-part $swapn; }
201 bootdev() { add-part $bootn; }
202 boot2dev() { add-part $boot2n; }
203 efidev() { add-part $efin; }
204 grub_extdev() { add-part $grub_extn; }
205 bios_grubdev() { add-part $bios_grubn; }
206 even_bigdev() { add-part $even_bign; }
207
208 crypt-dev() { echo /dev/mapper/crypt_dev_${1##*/}; }
209 crypt-name() { echo crypt_dev_${1##*/}; }
210 root-cryptdev() { crypt-dev $(rootdev $@); }
211 root2-cryptdev() { crypt-dev $(root2dev $@); }
212
213 # I omit a possible parameter since it is unused:
214 ##swap-cryptdev() { crypt-dev $(swapdev $@); }
215 swap-cryptdev() { crypt-dev $(swapdev); }
216 root-cryptname() { crypt-name $(rootdev); }
217 root2-cryptname() { crypt-name $(root2dev); }
218 swap-cryptname() { crypt-name $(swapdev); }
219
220 dev-mib() {
221 local d=${1:-$dev}
222 echo $(( $(parted -m $d unit MiB print | \
223 sed -nr "s#^/dev/[^:]+:([0-9]+).*#\1#p") - 1))
224 }
225
226 luks-setup() {
227 local luksdev="$1"
228 # when we move to newer than trisquel 9, we can remove
229 # --type luks1. We can also check on cryptsetup --help | less /compil
230 # to see about the other settings. Default in debian 9 is luks2.
231 # You can convert from luks2 to luks 1 by adding a temporary key:
232 # cryptsetup luksAddKey --pbkdf pbkdf2
233 # then remove the new format keys with cryptsetup luksRemoveKey
234 # then cryptsetup convert DEV --type luks1, then readd old keys and remove temp.
235 yes YES | cryptsetup luksFormat $luksdev $luks_file || [[ $? == 141 ]]
236 yes "$lukspw" | \
237 cryptsetup luksAddKey --key-file $luks_file \
238 $luksdev || [[ $? == 141 ]]
239 # background: Keyfile and password are treated just
240 # like 2 ways to input a passphrase, so we don't actually need to have
241 # different contents of keyfile and passphrase, but it makes some
242 # security sense to a really big randomly generated passphrase
243 # as much as possible, so we have both.
244 #
245 # This would remove the keyfile.
246 # yes 'test' | cryptsetup luksRemoveKey /dev/... \
247 # /key/file || [[ $? == 141 ]]
248 cryptsetup luksOpen $luksdev $(crypt-name $luksdev) --key-file $luks_file
249 }
250
251 ##### end function defs
252
253
254 ##### begin variable setup
255 if ifclass REPARTITION; then
256 partition=true # force a full wipe
257 else
258 partition=false # change to true to force a full wipe
259 fi
260 if ifclass NOWIPE; then
261 wipe=false
262 else
263 wipe=true
264 fi
265
266 if (($(nproc) > 2)); then
267 mopts=,compress=zstd
268 fi
269
270 declare -A disk_excludes
271 if ! $mkroot2 && ! $mkroot2tab && ! $mktab ! ifclass USE_MOUNTED; then
272 ## ignore disks that are mounted, eg when running from fai-cd
273 while read -r l; do
274 eval "$l"
275 if [[ ! $PKNAME ]]; then
276 # shellcheck disable=SC2153 # not a misspelling
277 PKNAME="$KNAME"
278 fi
279 if [[ $MOUNTPOINT ]]; then
280 disk_excludes[$PKNAME]=true
281 fi
282 done < <(lsblk -nP -o KNAME,MOUNTPOINT,PKNAME)
283 fi
284
285 hdds=()
286 ssds=()
287 # this excludes usb. note: i may encounter some other type in the future.
288 for disk in $(lsblk -do name,tran -n | awk '$2 ~ "^(sata|nvme)$" { print $1 }'); do
289 if [[ ${disk_excludes[$disk]} ]]; then
290 continue
291 fi
292 case $(cat /sys/block/$disk/queue/rotational) in
293 0) ssds+=("/dev/$disk") ;;
294 1) hdds+=("/dev/$disk") ;;
295 *) echo "$0: error: unknown /sys/block/$disk/queue/rotational: \
296 $(cat $disk/queue/rotational)"; exit 1 ;;
297 esac
298 done
299
300 # install all ssds, or if there are none, all hdds.
301 # Note, usb flash disks are seen as rotational, which is
302 # very odd, but convenient for ignoring them here.
303 # TODO: find a reliable way to ignore them.
304 if ! ifclass ROTATIONAL && (( ${#ssds[@]} > 0 )); then
305 read -ra short_devs<<<"${ssds[@]}"
306 else
307 read -ra short_devs<<<"${hdds[@]}"
308 fi
309
310 # check if the partitions exist have the right filesystems
311 #blkid="$(blkid -s TYPE)"
312 for dev in ${short_devs[@]}; do
313 if $partition; then break; fi
314 y=$(readlink -f $dev)
315 # shellcheck disable=SC2206 # globbing is intended
316 arr=($y?*)
317 if (( ${#arr[@]} < lastn )); then
318 partition=true
319 fi
320 # On one system, blkid is missing some partitions.
321 # maybe we need a flag, like FUZZY_BLKID or something, so we
322 # can check that at least some exist.
323 # for x in "`rootdev`: TYPE=\"crypto_LUKS\"" "`bootdev`: TYPE=\"btrfs\""; do
324 # echo "$blkid" | grep -Fx "$x" &>/dev/null || partition=true
325 # done
326 done
327
328 if $partition && ifclass PARTITION_PROMPT; then
329 echo "Press any key except ctrl-c to continue and partition these drives:"
330 echo " ${short_devs[*]}"
331 read -r
332 fi
333
334 devs=()
335 shopt -s extglob
336 for short_dev in ${short_devs[@]}; do
337 devs+=("$(devbyid $short_dev)")
338 done
339 if [[ ! ${devs[0]} ]]; then
340 echo "$0: error: failed to detect devs" >&2
341 exit 1
342 fi
343
344 boot_space=0
345 first=true
346 boot_devs=()
347 boot2_devs=()
348 for dev in ${devs[@]}; do
349 if ifclass frodo; then
350 # I ran into a machine where the bios doesn't know about some disks,
351 # so 1st stage of grub also doesn't know about them.
352 # Also, grub does not support mounting degraded btrfs as far as
353 # I can tell with some googling.
354 # From within an arch install env, I could detect them by noting
355 # their partitions were mixed with the next disk in /dev/disk/by-path,
356 # and I have mixed model disks, and I could see the 8 models which showed
357 # up in the bios, and thus see which 2 models were missing.
358 # hdparm -I /dev/sdh will give model info in linux.
359 # However, in fai on jessie, /dev/disk/by-path dir doesn't exist,
360 # and I don't see another way, so I'm hardcoding them.
361 # We still put grub on them and partition them the same, for uniformity
362 # and in case they get moved to a system that can recognize them,
363 # we just exclude them from the boot filesystem.
364 cd /dev/disk/by-id/
365 bad_disk=false
366 for id in ata-TOSHIBA_MD04ACA500_8539K4TQFS9A \
367 ata-TOSHIBA_MD04ACA500_Y5IFK6IJFS9A; do
368 if [[ $(readlink -f $id) == "$(readlink -f $dev)" ]]; then
369 bad_disk=true
370 break
371 fi
372 done
373 if ! $bad_disk; then
374 boot_devs+=("$(bootdev)")
375 boot2_devs+=("$(boot2dev)")
376 fi
377 else
378 boot_space=$(( boot_space + $(parted -m $dev unit MiB print | \
379 sed -nr "s#^/dev/[^:]+:([0-9]+).*#\1#p") - 1))
380 boot_devs+=("$(bootdev)")
381 boot2_devs+=("$(boot2dev)")
382 fi
383 if $first && (( ${boot_devs[@]} >= 1 )) ; then
384 first_efi=$(efidev)
385 first_grub_extdev=$(grub_extdev)
386 first=false
387 fi
388 done
389 first_boot_dev=${boot_devs[0]}
390
391 even_raid=false
392 if ifclass RAID0 || (( ${#boot_devs[@]} == 1 )); then
393 raid_level=0
394 elif ifclass RAID1 || (( ${#boot_devs[@]} <= 3 )); then
395 if (( ${#boot_devs[@]} == 2 )); then
396 even_raid=true
397 fi
398 raid_level=1
399 else
400 raid_level=10
401 fi
402
403
404
405 ### Begin calculate boot partition space
406 # due to raid duplication
407 case $raid_level in
408 1*) boot_space=$(( boot_space / 2 )) ;;
409 esac
410 if (( boot_space > 60000 )); then
411 # this is larger than needed for several /boot subvols,
412 # becuase I keep a minimal debian install on it for
413 # recovery needs and for doing pxe-kexec.
414 boot_mib=10000
415 root2_mib=1000000
416 boot2_mib=5000
417 elif (( boot_space > 30000 )); then
418 boot_mib=$(( 5000 + (boot_space - 30000) / 2 ))
419 root2_mib=100
420 boot2_mib=100
421 else
422 # Small vms don't have room for /boot recovery. With 3 kernels
423 # installed, i'm using 132M on t8, so this seems like plenty of
424 # room. note: rhel 8 recomments 1g for /boot. u20.04 with 3 kernels =
425 # 308 mb, so things have grown significantly
426 boot_mib=1000
427 root2_mib=100
428 boot2_mib=100
429 fi
430 case $raid_level in
431 1*)
432 boot_mib=$(( boot_mib * 2 ))
433 boot2_mib=$(( boot2_mib * 2 ))
434 root2_mib=$(( root2_mib * 2 ))
435 ;;
436 esac
437 ### end calculate boot partition space
438
439 bpart() { # btrfs a partition
440 case $raid_level in
441 0) mkfs.btrfs -f $@ ;;
442 1) mkfs.btrfs -f -m raid1 -d raid1 $@ ;;
443 10) mkfs.btrfs -f -m raid10 -d raid10 $@ ;;
444 esac
445 }
446
447
448 if [[ ! $DISTRO ]]; then
449 if ifclass VOL_BULLSEYE_BOOTSTRAP; then
450 DISTRO=debianbullseye_bootstrap
451 elif ifclass VOL_STRETCH; then
452 DISTRO=debianstretch
453 elif ifclass VOL_BUSTER; then
454 DISTRO=debianbuster
455 elif ifclass VOL_BULLSEYE; then
456 DISTRO=debianbullseye
457 elif ifclass VOL_BOOKWORM; then
458 DISTRO=debianbookworm
459 elif ifclass VOL_TESTING; then
460 DISTRO=debiantesting
461 elif ifclass VOL_XENIAL; then
462 DISTRO=ubuntuxenial
463 elif ifclass VOL_BIONIC; then
464 DISTRO=ubuntubionic
465 elif ifclass VOL_FOCAL; then
466 DISTRO=ubuntufocal
467 elif ifclass VOL_FLIDAS; then
468 DISTRO=trisquelflidas
469 elif ifclass VOL_ETIONA; then
470 DISTRO=trisqueletiona
471 elif ifclass VOL_NABIA; then
472 DISTRO=trisquelnabia
473 elif $mkroot2 || $mkroot2tab; then
474 :
475 else
476 echo "PARTITIONER ERROR: no distro class/var set" >&2
477 exit 1
478 fi
479 fi
480
481 if [[ $DISTRO == debianbullseye_bootstrap ]]; then
482 # this is just convenience for the libreboot_grub config
483 # so we can glob the other ones easier.
484 boot_vol=$DISTRO
485 else
486 boot_vol=boot_$DISTRO
487 fi
488
489
490 first_root_crypt=$(root-cryptdev ${devs[0]})
491
492 # 1.5 x based on https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/Installation_Guide/sect-disk-partitioning-setup-x86.html#sect-custom-partitioning-x86
493 swap_mib=$(( $(grep ^MemTotal: /proc/meminfo | \
494 awk '{print $2}') * 3/(${#devs[@]} * 2 ) / 1024 ))
495
496 root_devs=()
497 for dev in ${devs[@]}; do
498 root_devs+=("$(rootdev)")
499 done
500 shopt -s nullglob
501 ##### end variable setup
502
503
504
505 mktab() {
506 mkdir -p /tmp/fai
507 dev=${boot_devs[0]}
508 fstabstd="x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s"
509
510 if [[ $DISTRO == debianbullseye_bootstrap ]]; then
511 cat > /tmp/fai/fstab <<EOF
512 $first_boot_dev / btrfs noatime,subvol=$boot_vol 0 0
513 $first_efi /boot/efi vfat nofail,$fstabstd 0 0
514 EOF
515 cat >/tmp/fai/disk_var.sh <<EOF
516 BOOT_DEVICE="${short_devs[@]}"
517 ROOT_PARTITION=$first_boot_dev
518 EOF
519 else
520 # note, fai creates the mountpoints listed here
521 cat > /tmp/fai/fstab <<EOF
522 $first_root_crypt / btrfs $fstabstd,noatime,subvol=root_$DISTRO$mopts 0 0
523 $first_root_crypt /mnt/root btrfs nofail,$fstabstd,noatime,subvolid=0$mopts 0 0
524 $first_boot_dev /boot btrfs nofail,$fstabstd,noatime,subvol=$boot_vol 0 0
525 $first_efi /boot/efi vfat nofail,$fstabstd 0 0
526 $first_boot_dev /mnt/boot btrfs nofail,$fstabstd,noatime,subvolid=0 0 0
527 EOF
528 swaps=()
529 rm -f /tmp/fai/crypttab
530 for dev in ${devs[@]}; do
531 swaps+=("$(swap-cryptname)")
532 cat >>/tmp/fai/crypttab <<EOF
533 $(root-cryptname) $(rootdev) none keyscript=/root/keyscript,discard,luks,initramfs
534 $(swap-cryptname) $(swapdev) /dev/urandom swap,cipher=aes-xts-plain64,size=256,hash=ripemd160
535 EOF
536 cat >> /tmp/fai/fstab <<EOF
537 $(swap-cryptdev) none swap nofail,$fstabstd,sw 0 0
538 EOF
539 done
540
541 # fai would do this:
542 #BOOT_DEVICE=\${BOOT_DEVICE:-"${devs[0]}"}
543
544 # note: swaplist seems to do nothing.
545 cat >/tmp/fai/disk_var.sh <<EOF
546 BOOT_DEVICE="${short_devs[@]}"
547 BOOT_PARTITION=\${BOOT_PARTITION:-$first_boot_dev}
548 # ROOT_PARTITIONS is added by me, used in arch setup.
549 ROOT_PARTITIONS="${root_devs[@]}"
550 ROOT_PARTITION=\${ROOT_PARTITION:-$first_root_crypt}
551 SWAPLIST=\${SWAPLIST:-"${swaps[@]}"}
552 EOF
553
554 if [[ $HOSTNAME == kd ]]; then
555 # note, having these with keyscript and initramfs causes a luks error in fai.log,
556 # but it is safely ignorable and gets us the ability to just type our password
557 # in once at boot. A downside is that they are probably needed to be plugged in to boot.
558 cat >>/tmp/fai/crypttab <<EOF
559 crypt_dev_ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V-part${even_bign} /dev/disk/by-id/ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V-part7 none keyscript=decrypt_keyctl,discard,luks,initramfs
560 crypt_dev_ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 /dev/disk/by-id/ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 none keyscript=decrypt_keyctl,discard,luks,initramfs
561 crypt_dev_ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 /dev/disk/by-id/ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 none keyscript=decrypt_keyctl,discard,luks,initramfs
562 EOF
563 cat >> /tmp/fai/fstab <<EOF
564 # r7 = root partition7. it isnt actually #7 anymore, not a great name, but whatever
565 /dev/mapper/crypt_dev_ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V-part${even_bign} /mnt/r7 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
566 /dev/mapper/crypt_dev_ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 /mnt/rust1 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
567 /dev/mapper/crypt_dev_ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 /mnt/rust2 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
568 EOF
569 fi
570 fi
571 }
572
573
574
575 getluks() {
576 if [[ ! $luks_dir ]]; then
577 # see README for docs about how to create these
578 luks_dir=$FAI/distro-install-common/luks
579 if [[ ! -d $luks_dir ]]; then
580 luks_dir=/q/root/luks
581 fi
582 if [[ ! -d $luks_dir ]]; then
583 echo "$0: error: no luks_dir found" >&2
584 exit 1
585 fi
586 fi
587
588 luks_file=$luks_dir/host-$HOSTNAME
589 if [[ ! -e $luks_file ]]; then
590 # shellcheck disable=SC2206 # globbing is intended
591 hostkeys=($luks_dir/host-*)
592 # if there is only one key, we might be deploying somewhere
593 # where dhcp doesnt give us a proper hostname, so use that.
594 if [[ ${#hostkeys[@]} == 1 && -e ${hostkeys[0]} ]]; then
595 luks_file=${hostkeys[0]}
596 else
597 echo "$0: error: no key for hostname at $luks_file" >&2
598 exit 1
599 fi
600 fi
601
602 # # note, corresponding changes in /b/ds/keyscript-{on,off}
603 if ifclass demohost; then
604 lukspw=x
605 elif [[ -e $luks_dir/$HOSTNAME ]]; then
606 lukspw=$(cat $luks_dir/$HOSTNAME)
607 else
608 lukspw=$(cat $luks_dir/iank)
609 fi
610
611 if $mkroot2; then
612 luks_file=$luks_dir/host-amy
613 lukspw=$(cat $luks_dir/amy)
614 fi
615 }
616
617
618 #### root2 non-fai run
619 doroot2() {
620
621 # We write to these files instead of just /etc/fstab, /etc/crypttab,
622 # because these are filesystems created after our current root, and so
623 # this allows us to update other root filesystems too.
624 rm -f /mnt/root/root2-{fs,crypt}tab
625 if $partition; then
626 echo $0: error: found partition=true but have mkroot2 arg
627 exit 1
628 fi
629 for dev in ${devs[@]}; do
630 if $mkroot2; then
631 luks-setup $(root2dev)
632 fi
633 cat >>/mnt/root/root2-crypttab <<EOF
634 $(root2-cryptname) $(root2dev) $luks_file discard,luks,initramfs
635 EOF
636 done
637 if $mkroot2; then
638 bpart $(for dev in ${devs[@]}; do root2-cryptdev; done)
639 bpart ${boot2_devs[@]}
640 fi
641 mkdir -p /mnt/root2 /mnt/boot2
642 cat >>/mnt/root/root2-fstab <<EOF
643 $(root2-cryptdev ${devs[0]}) /mnt/root2 btrfs nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,noatime,subvolid=0$mopts 0 0
644 ${boot2_devs[0]} /mnt/boot2 btrfs nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,noatime,subvolid=0 0 0
645 EOF
646 exit 0
647 }
648
649 if $mkroot2 || $mkroot2tab; then
650 getluks
651 doroot2
652 elif $mktab; then
653 mktab
654 exit 0
655 else
656 mktab
657 getluks
658 fi
659
660
661 if $partition; then
662 ### begin wipefs
663 if [[ ! $SPECIAL_DISK ]]; then
664 for dev in ${devs[@]}; do
665 # if we repartition to the same as an old partition,
666 # we don't want any old fses hanging around.
667 for (( i=1; i <= lastn; i++ )); do
668 x=$(add-part $i)
669 [[ -e $x ]] || continue
670 count_down=10
671 # wipefs has failed, manual run works, google suggests timing issue
672 while ! wipefs -a $x; do
673 sleep 2
674 count_down=$((count_down - 1))
675 (( count_down > 0 )) || exit 1
676 done
677 done
678 done
679 fi
680 ### end wipefs
681
682
683 # When we have 2 disks of at least 100g difference in size,
684 # make an extra partition on the end of the bigger one.
685 even_big_part=false
686 even_diff_min=100000
687 if $even_raid; then
688 smalli=0
689 bigi=1
690 if (( $(dev-mib ${devs[0]}) >= $(dev-mib ${devs[1]}) )); then
691 smalli=1
692 bigi=0
693 fi
694 disk_mib=$(dev-mib ${devs[smalli]})
695 even_big_dev=${devs[bigi]}
696 even_big_mib=$(dev-mib $even_big_dev)
697 if (( even_big_mib - disk_mib > even_diff_min )); then
698 even_big_part=true
699 fi
700 fi
701
702 for dev in ${devs[@]}; do
703 if [[ $SPECIAL_DISK ]]; then
704 dev=$(devbyid $SPECIAL_DISK)
705 fi
706
707 # parted will round up the disk size. Do -1 so we can have
708 # fully 1MiB unit partitions for easy resizing of the last partition.
709 # Otherwise we would pass in -0 for the end argument for the last partition.
710 #
711 # Note: parted print error output is expected. example:
712 # Error: /dev/vda: unrecognised disk label
713 if ! $even_raid; then
714 disk_mib=$(dev-mib)
715 fi
716
717 boot_part_mib=$(( boot_mib / ${#boot_devs[@]} ))
718 boot2_part_mib=$(( boot2_mib / ${#boot_devs[@]} ))
719 root2_part_mib=$(( root2_mib / ${#root_devs[@]} ))
720 root_end=$(( disk_mib - root2_part_mib - swap_mib - boot_part_mib - boot2_part_mib ))
721 root2_end=$(( root_end + root2_part_mib ))
722 swap_end=$(( root2_end + swap_mib ))
723 boot_end=$(( swap_end + boot_part_mib ))
724
725 parted -s $dev mklabel gpt
726 # MiB because parted complains about alignment otherwise.
727 pcmd="parted -a optimal -s -- $dev"
728 # root partition, the main big one
729 $pcmd mkpart primary ext3 524MiB ${root_end}MiB
730 # without naming, systemd gives us misc errors like:
731 # dev-disk-by\x2dpartlabel-primary.device: Dev dev-disk-by\x2dpartlabel-primary.device appeared twice
732 $pcmd name $rootn root
733 # root2 partition
734 $pcmd mkpart primary ext3 ${root_end}MiB ${root2_end}MiB
735 $pcmd name $root2n root2
736 # normally a swap is type "linux-swap", but this is encrypted swap. using that
737 # label will confuse systemd.
738 # swap partition
739 $pcmd mkpart primary "" ${root2_end}MiB ${swap_end}MiB
740 $pcmd name $swapn swap
741 # boot partition
742 $pcmd mkpart primary "" ${swap_end}MiB ${boot_end}MiB
743 $pcmd name $bootn boot
744 # boot2 partition
745 $pcmd mkpart primary "" ${boot_end}MiB ${disk_mib}MiB
746 $pcmd name $boot2n boot2
747 # uefi partition. efi sucks, half a gig, rediculous.
748 $pcmd mkpart primary "fat32" 12MiB 524MiB
749 $pcmd name $efin efi
750 $pcmd set $efin esp on
751 # note, this is shown here: https://support.system76.com/articles/bootloader/
752 # but not mentioned https://wiki.archlinux.org/index.php/EFI_system_partition
753 # probably not needed
754 $pcmd set $bootn boot on
755 $pcmd set $boot2n boot on
756 # i only need a few k, but googling min size,
757 # I found someone saying that gparted required
758 # required at least 8 because of their hard drive cylinder size.
759 # And 8 is still very tiny.
760 # grub_ext partition
761 $pcmd mkpart primary "ext2" 4MiB 12MiB
762 $pcmd name $grub_extn grubext
763 # gpt ubuntu cloud image uses ~4 mb for this partition. fai uses 1 MiB.
764 # so, I use 3, whatever.
765 # note: parted manual saying cheap flash media
766 # should to start at 4.
767 # biols grub partition
768 $pcmd mkpart primary "" 1MiB 4MiB
769 $pcmd name $bios_grubn biosgrub
770 $pcmd set $bios_grubn bios_grub on
771 if $even_big_part && [[ $dev == "$even_big_dev" ]]; then
772 $pcmd mkpart primary ext3 ${disk_mib}MiB ${even_big_mib}MiB
773 $pcmd name $even_bign even_big
774 fi
775
776 # the mkfs failed before on a vm, which prompted me to add
777 # sleep .1
778 # then it failed again on a physical machine
779 # with:
780 # Device /dev/disk/by-id/foo doesn't exist or access denied,
781 # so I added a wait until it existed.
782 # Then I added the mkfs.ext2, which claimed to succeed,
783 # but then couldn't be found upon reboot. In that case we didn't
784 # wait at all. So I've added a 3 second minimum wait.
785 secs=0
786 while [[ ! -e $(bios_grubdev) ]] && (( secs < 10 )); do
787 sleep 1
788 secs=$((secs +1))
789 done
790 sleep 3
791
792 mkfs.fat -F32 $(efidev)
793
794 if $even_big_part && [[ $dev == "$even_big_dev" ]]; then
795 luks-setup $(even_bigdev)
796 mkfs.btrfs -f $(crypt-dev $(even_bigdev))
797 fi
798
799 # Holds just a single file, rarely written, so
800 # use ext2, like was often used for the /boot partition.
801 # This exists because grub can only persist data to a non-cow fs.
802 # And we use persisting a var in grub to do a one time boot.
803 # We could pass the data on the kernel command line and persist it
804 # to grubenv after booting, but that relies on the boot always succeeding.
805 # This is just a bit more robust, and it could work for booting
806 # into ipxe which can't persist data, if we ever got that working.
807 mkfs.ext2 $(grub_extdev)
808 luks-setup $(rootdev)
809
810 if [[ $SPECIAL_DISK ]]; then
811 exit 0
812 fi
813 done
814 ls -la /dev/btrfs-control # this was probably for debugging...
815 sleep 1
816 bpart $(for dev in ${devs[@]}; do root-cryptdev; done)
817 bpart ${boot_devs[@]}
818 else
819 for dev in ${devs[@]}; do
820 if [[ -e /dev/mapper/$(root-cryptname) ]]; then
821 continue
822 fi
823 cryptsetup luksOpen $(rootdev) $(root-cryptname) \
824 --key-file $luks_file
825 done
826 sleep 1
827 fi
828
829
830 if $wipe && [[ $DISTRO != debianbullseye_bootstrap ]]; then
831 # bootstrap distro doesn't use separate encrypted root.
832 mount -o subvolid=0 $first_root_crypt /mnt
833 # systemd creates subvolumes we want to delete.
834 mapfile -t s < <(btrfs subvolume list --sort=-path /mnt |
835 sed -rn "s#^.*path\s*(root_$DISTRO/\S+)\s*\$#\1#p")
836 for subvol in ${s[@]}; do btrfs subvolume delete /mnt/$subvol; done
837 btrfs subvolume set-default 0 /mnt
838 [[ ! -e /mnt/root_$DISTRO ]] || btrfs subvolume delete /mnt/root_$DISTRO
839
840 ## create subvols ##
841 cd /mnt
842
843 btrfs subvolume create root_$DISTRO
844
845 # could set default subvol like this, but no reason to.
846 # btrfs subvolume set-default \
847 # $(btrfs subvolume list . | grep "root_$DISTRO$" | awk '{print $2}') .
848
849 # For raid systems, cow allows for error correction, for non-raid systems,
850 # protects root fs from having the plug pulled. Reprovisioning a root
851 # subvol is not my favorite thing to do.
852 # # no cow on the root filesystem. it's setup is fully scripted,
853 # # if it's messed up, we will just recreated it,
854 # # and we can get better perf with this.
855 # # I can't remember exactly why, but this is preferable to mounting with
856 # # -o nodatacow, I think because subvolumes inherit that.
857 # chattr -Rf +C root_$DISTRO
858 cd /
859 umount /mnt
860 fi
861
862 mount -o subvolid=0 $first_boot_dev /mnt
863 cd /mnt
864 btrfs subvolume set-default 0 /mnt # already default, just ensuring it.
865
866 # for libreboot systems. grub2 only reads from subvolid=0
867 mkdir -p /mnt/grub2
868 cp $FAI/distro-install-common/libreboot_grub.cfg /mnt/grub2
869
870 if $wipe && [[ -e /mnt/$boot_vol ]]; then
871 btrfs subvolume delete /mnt/$boot_vol
872 fi
873 if [[ ! -e /mnt/$boot_vol ]]; then
874 btrfs subvolume create $boot_vol
875 fi
876 cd /
877 umount /mnt
878 ## end create subvols ##
879
880 mount $first_grub_extdev /mnt
881 grub-editenv /mnt/grubenv set did_fai_check=true
882 grub-editenv /mnt/grubenv set last_boot=/$boot_vol
883 umount /mnt
884
885
886 # initial setup of extra data fs, mounted,
887 # btrfs subvol create nocow
888 # chattr +C nocow
889 # chown iank.iank nocow
890