e953cf19a238381fb3b9c107396e5fe961043667
[automated-distro-installer] / fai / config / hooks / partition.DEFAULT
1 #!/bin/bash -x
2 # Copyright (C) 2016 Ian Kelling
3
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # of the License, or (at your option) any later version.
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17
18 # todo /boot/chboot needs update for lvm i think?
19
20 PS4='+ $LINENO '
21 set -eE -o pipefail
22 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
23
24 if [[ $EUID != 0 ]]; then
25 echo "$0: error: need to run as root" >&2
26 exit 1
27 fi
28
29 # for calling outside of FAI without args:
30 # fai-redep
31 #
32 # source /b/fai/fai-wrapper
33 # - set any appropriate classes with: fai-setclass OPT1... which sets CLASS_OPT1=true...
34 # or run eval-fai-classfile FILE.
35 # - Set a VOL_DISTROVER (if not doing mkroot2) eg:
36 # fai-setclass VOL_NABIA
37 #
38 # ARGS (only 1 is valid):
39 #
40 # mkroot2: for running outside of fai and setting up the root2/boot2 luks and btrfs and tab files
41 #
42 # mkroot2tab: for running outside of fai and setting up the root2/boot2 tab files, in case luks and btrfs
43 # happen to already be setup.
44 #
45 # mktab: for running outside of fai and generating a crypttab for
46 # the main root fs in /tmp/fai. Must run with env var, eg export DISTRO=trisquelnabia.
47 #
48 # Example use in a bootstrap distro:
49 # scp /a/bin/fai/fai/config/{distro-install-common/devbyid,hooks/partition.DEFAULT} root@HOST:
50 # sl HOST
51 # export DISTRO=trisquelnabia; ./partition.DEFAULT mktab
52 # ## cryptsetup wont take within a pipeline
53 # mapfile -t lines < <(awk '! /swap/ {print $2,$1}' /tmp/fai/crypttab )
54 # for l in "${lines[@]}"; do cryptsetup luksOpen $l; done
55 #
56 # # or alternatively, to avoid typing it many times:
57 # read -r lukspw; for l in "${lines[@]}"; do yes "$lukspw" | cryptsetup luksOpen $l; done
58
59 ## potentially useful later:
60 # sed 's#/root/keyscript,#decrypt_keyctl,#;s/$/,noauto/' /tmp/fai/crypttab >/etc/crypttab
61 #
62 # environment variables:
63 #
64 # HOSTNAME: if frodo, we exclude 2 devices from the /boot array, which
65 # the bios does not see. if demohost, we set the luks password to just
66 # 'x'.
67 #
68 # SPECIAL_DISK: For use outside of fai. A base disk name like
69 # /dev/sdk. If set, we just cryptsetup and partition this one disk then
70 # exit. This is useful for partitioning a disk in preparation to replace
71 # a failed or failing disk from a raid10 array.
72 #
73 # classes:
74 #
75 # REPARTITION: forces repartitioning even if we detect the proper amount
76 # of partitions already exist.
77 #
78 # NOWIPE: use existing subvolumes if they exist
79 #
80 # REROOTFS: Don't reuse the root filesystem, even if we normally would
81 #
82 # ROTATIONAL: forces to install onto hdds instead of sdds. normally sdds
83 # are chosen if they exist.
84 #
85 # PARTITION_PROMPT: command line prompt before partitioning. This is good
86 # to set if we don't expect repartitioning to happen.
87 #
88 # ROTATIONAL: in a system with ssd and hdd, install to the hdd
89 # instead of the default ssd.
90 #
91 # RAID0: forces raid0 filesystem. Normally with 4+ devices, we use
92 # raid10.
93 # RAID1: forces raid1 filesystem.
94 # RAID1c3: forces raid1c3 filesystem (btrfs raid 1, 3 copies).
95
96 mkroot2tab=false
97 mkroot2=false
98 mktab=false
99 if [[ $1 ]]; then
100 ## duplicates fai-wrapper, for convenience of not needing it
101 if ! type -t ifclass &>/dev/null; then
102 ifclass() {
103 local var=${1/#/CLASS_}
104 [[ $HOSTNAME == "$1" || ${!var} ]]
105 }
106 fi
107
108 case $1 in
109 mkroot2)
110 mkroot2=true
111 ;;
112 mkroot2tab)
113 mkroot2tab=true
114 ;;
115 mktab)
116 mktab=true
117 ;;
118 *)
119 echo "$0: error: unsupported arg: $1" >&2
120 exit 1
121 ;;
122 esac
123 fi
124
125
126 if [[ $SPECIAL_DISK ]]; then
127 export CLASS_REPARTITION=true
128 fi
129
130 # # fai's setup-storage won't do btrfs on luks,
131 # # so we do it ourself :)
132 # inspiration taken from files in fai-setup-storage package
133
134 # if we are not running in fai, skiptask won't be defined, so carry on.
135 skiptask partition || ! type skiptask
136
137 if ! type -p devbyid; then
138 for d in $FAI/distro-install-common \
139 /a/bin/fai/fai/config/distro-install-common $FAI $PWD; do
140 [[ -d $d ]] || continue
141 if [[ -e $d/devbyid ]]; then
142 devbyid=$d/devbyid
143 devbyid() { $devbyid "$@"; }
144 break
145 fi
146 done
147 if [[ ! $devbyid ]]; then
148 echo "$0: error: failed to find devbyid script" >&2
149 exit 1
150 fi
151 fi
152
153 #### begin configuration
154
155 # this is the ordering of the /dev/sdaX, but
156 # the ordering of the partition layout goes like this:
157 # bios_grub
158 # grub_ext
159 # efi
160 # lvm
161 # root
162 # swap
163 # boot
164
165
166
167
168 ##### end configuration
169
170 ##### begin function defs
171
172 bpart() { # btrfs a partition
173 case $raid_level in
174 0) mkfs.btrfs -f $@ ;;
175 *) mkfs.btrfs -f -m raid$raid_level -d raid$raid_level $@ ;;
176 esac
177 }
178
179
180 zilap() {
181 case $HOSTNAME in
182 sy|bo)
183 return 0
184 ;;
185 esac
186 return 1
187 }
188
189 getluks() {
190 if [[ ! $luks_dir ]]; then
191 # see README for docs about how to create these
192 luks_dir=$FAI/distro-install-common/luks
193 if [[ ! -d $luks_dir ]]; then
194 luks_dir=/q/root/luks
195 fi
196 if [[ ! -d $luks_dir ]]; then
197 echo "$0: error: no luks_dir found" >&2
198 exit 1
199 fi
200 fi
201
202 luks_file=$luks_dir/host-$HOSTNAME
203 if [[ ! -e $luks_file ]]; then
204 # shellcheck disable=SC2206 # globbing is intended
205 hostkeys=($luks_dir/host-*)
206 # if there is only one key, we might be deploying somewhere
207 # where dhcp doesnt give us a proper hostname, so use that.
208 if [[ ${#hostkeys[@]} == 1 && -e ${hostkeys[0]} ]]; then
209 luks_file=${hostkeys[0]}
210 else
211 echo "$0: error: no key for hostname at $luks_file" >&2
212 exit 1
213 fi
214 fi
215
216 # # note, corresponding changes in /b/ds/keyscript-{on,off}
217 if ifclass demohost; then
218 lukspw=x
219 elif [[ -e $luks_dir/$HOSTNAME ]]; then
220 lukspw=$(cat $luks_dir/$HOSTNAME)
221 else
222 lukspw=$(cat $luks_dir/iank)
223 fi
224
225 if $mkroot2; then
226 luks_file=$luks_dir/host-amy
227 lukspw=$(cat $luks_dir/amy)
228 fi
229 }
230
231
232 fsf() {
233 ifclass FSF
234 }
235
236
237 dev-mib() {
238 local d=${1:-$dev}
239 echo $(( $(parted -m $d unit MiB print | \
240 sed -nr "s#^/dev/[^:]+:([0-9]+).*#\1#p") - 1))
241 }
242
243 luks-setup() {
244 local luksdev="$1"
245 local cryptname="$2"
246 # when we move to newer than trisquel 9, we can remove
247 # --type luks1. We can also check on cryptsetup --help | less /compil
248 # to see about the other settings. Default in debian 9 is luks2.
249 # You can convert from luks2 to luks 1 by adding a temporary key:
250 # cryptsetup luksAddKey --pbkdf pbkdf2
251 # then remove the new format keys with cryptsetup luksRemoveKey
252 # then cryptsetup convert DEV --type luks1, then readd old keys and remove temp.
253 yes YES | cryptsetup luksFormat $luksdev $luks_file || [[ $? == 141 ]]
254 yes "$lukspw" | \
255 cryptsetup luksAddKey --key-file $luks_file \
256 $luksdev || [[ $? == 141 ]]
257 # background: Keyfile and password are treated just
258 # like 2 ways to input a passphrase, so we don't actually need to have
259 # different contents of keyfile and passphrase, but it makes some
260 # security sense to a really big randomly generated passphrase
261 # as much as possible, so we have both.
262 #
263 # This would remove the keyfile.
264 # yes 'test' | cryptsetup luksRemoveKey /dev/... \
265 # /key/file || [[ $? == 141 ]]
266 cryptsetup luksOpen $luksdev $cryptname --key-file $luks_file
267 }
268
269 mktab() {
270 mkdir -p /tmp/fai
271 dev=${boot_devs[0]}
272 fstabstd="x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s"
273
274 if [[ $DISTRO == debianbullseye_bootstrap ]]; then
275 cat > /tmp/fai/fstab <<EOF
276 $first_boot_dev / btrfs noatime,subvol=$boot_vol 0 0
277 $first_efi /boot/efi vfat nofail,$fstabstd 0 0
278 EOF
279 cat >/tmp/fai/disk_var.sh <<EOF
280 BOOT_DEVICE="${short_devs[@]}"
281 ROOT_PARTITION=$first_boot_dev
282 EOF
283 else
284 # note, fai creates the mountpoints listed here
285 cat > /tmp/fai/fstab <<EOF
286 $first_root_dev / btrfs $fstabstd,noatime,subvol=root_$DISTRO$mopts 0 0
287 $first_root_dev /mnt/root btrfs nofail,$fstabstd,noatime,subvolid=0$mopts 0 0
288 $first_boot_dev /boot btrfs nofail,$fstabstd,noatime,subvol=$boot_vol 0 0
289 $first_efi /boot/efi vfat nofail,$fstabstd 0 0
290 $first_boot_dev /mnt/boot btrfs nofail,$fstabstd,noatime,subvolid=0 0 0
291 EOF
292 rm -f /tmp/fai/crypttab
293 for vg in ${vgs[@]}; do
294 if ! fsf; then
295 cat >>/tmp/fai/crypttab <<EOF
296 crypt-$vg-root /dev/$vg/root none keyscript=/root/keyscript,discard,luks,initramfs
297 crypt-$vg-swap /dev/$vg/swap /dev/urandom swap,cipher=aes-xts-plain64,size=256,hash=ripemd160
298 EOF
299 fi
300 if fsf; then
301 cat >> /tmp/fai/fstab <<EOF
302 /dev/$vg/swap none swap nofail,$fstabstd,sw 0 0
303 EOF
304 else
305 cat >> /tmp/fai/fstab <<EOF
306 /dev/mapper/crypt-$vg-swap none swap nofail,$fstabstd,sw 0 0
307 EOF
308 fi
309 done
310
311 # fai would do this:
312 #BOOT_DEVICE=\${BOOT_DEVICE:-"${devs[0]}"}
313
314 # note: swaplist seems to do nothing.
315 cat >/tmp/fai/disk_var.sh <<EOF
316 BOOT_DEVICE="${short_devs[@]}"
317 BOOT_PARTITION=\${BOOT_PARTITION:-$first_boot_dev}
318 # ROOT_PARTITIONS is added by me, used in arch setup.
319 ROOT_PARTITIONS="${root_devs[@]}"
320 ROOT_PARTITION=\${ROOT_PARTITION:-$first_root_dev}
321 SWAPLIST=\${SWAPLIST:-"${swap_devs[@]}"}
322 EOF
323
324 if [[ $HOSTNAME == kd ]]; then
325 # note, having these with keyscript and initramfs causes a luks error in fai.log,
326 # but it is safely ignorable and gets us the ability to just type our password
327 # in once at boot. A downside is that they are probably needed to be plugged in to boot.
328 cat >>/tmp/fai/crypttab <<EOF
329 crypt_dev_ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V${even_bigsuf} /dev/disk/by-id/ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V${even_bigsuf} none keyscript=decrypt_keyctl,discard,luks,initramfs
330 crypt_dev_ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 /dev/disk/by-id/ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 none keyscript=decrypt_keyctl,discard,luks,initramfs
331 crypt_dev_ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 /dev/disk/by-id/ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 none keyscript=decrypt_keyctl,discard,luks,initramfs
332 EOF
333 cat >> /tmp/fai/fstab <<EOF
334 # r7 = root partition7. it isnt actually #7 anymore, not a great name, but whatever
335 /dev/mapper/crypt_dev_ata-Samsung_SSD_870_QVO_8TB_S5VUNG0N900656V${even_bigsuf} /mnt/r7 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
336 /dev/mapper/crypt_dev_ata-TOSHIBA_MD04ACA500_84R2K773FS9A-part1 /mnt/rust1 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
337 /dev/mapper/crypt_dev_ata-ST6000DM001-1XY17Z_Z4D29EBL-part1 /mnt/rust2 btrfs nofail,$fstabstd,noatime,compress=zstd,subvolid=0 0 0
338 EOF
339 fi
340 fi
341 }
342
343
344
345 #### root2 non-fai run
346
347 # todo: update for lvm
348 doroot2() {
349
350 # We write to these files instead of just /etc/fstab, /etc/crypttab,
351 # because these are filesystems created after our current root, and so
352 # this allows us to update other root filesystems too.
353 rm -f /mnt/root/root2-{fs,crypt}tab
354 if $partition; then
355 echo $0: error: found partition=true but have mkroot2 arg
356 exit 1
357 fi
358 root2_devs=()
359 for vg in ${vgs[@]}; do
360 root2_devs+=(/dev/mapper/crypt-$vg-root2)
361 if $mkroot2; then
362 luks-setup /dev/$vg/root2 crypt-$vg-root2
363 fi
364 cat >>/mnt/root/root2-crypttab <<EOF
365 crypt-$vg-root2 /dev/$vg/root2 $luks_file discard,luks,initramfs
366 EOF
367 done
368 if $mkroot2; then
369 bpart ${root2_devs[@]}
370 bpart ${boot2_devs[@]}
371 fi
372 mkdir -p /mnt/root2 /mnt/boot2
373 cat >>/mnt/root/root2-fstab <<EOF
374 ${root2_devs[0]} /mnt/root2 btrfs nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,noatime,subvolid=0$mopts 0 0
375 ${boot2_devs[0]} /mnt/boot2 btrfs nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,noatime,subvolid=0 0 0
376 EOF
377 exit 0
378 }
379
380
381 ##### end function defs
382
383
384 ##### begin variable setup
385 partition=false
386 if ifclass REPARTITION; then
387 partition=true # force a full wipe
388 fi
389 wipe=true
390 if ifclass NOWIPE; then
391 wipe=false
392 fi
393
394 rerootfs=false
395 if ifclass REROOTFS; then
396 rerootfs=true
397 fi
398
399 if (($(nproc) > 2)); then
400 mopts=,compress=zstd
401 fi
402
403 declare -A disk_excludes
404 if ! $mkroot2 && ! $mkroot2tab && ! $mktab ! ifclass USE_MOUNTED; then
405 ## ignore disks that are mounted, eg when running from fai-cd
406 while read -r l; do
407 eval "$l"
408 if [[ ! $PKNAME ]]; then
409 # shellcheck disable=SC2153 # not a misspelling
410 PKNAME="$KNAME"
411 fi
412 if [[ $MOUNTPOINT ]]; then
413 disk_excludes[$PKNAME]=true
414 fi
415 done < <(lsblk -nP -o KNAME,MOUNTPOINT,PKNAME)
416 fi
417
418 hdds=()
419 ssds=()
420 # this excludes "usb". vda disk has empty tran (transport). This may need adjustment
421 # for some new type we come across. cdrom has type "rom"
422 for disk in $(lsblk -ndo name,type,tran | awk '$3 ~ "^(sata|nvme|)$" && $2 == "disk" { print $1 }'); do
423 if [[ ${disk_excludes[$disk]} ]]; then
424 continue
425 fi
426 case $disk in
427 # cdrom
428 sr*) continue ;;
429 esac
430 case $(cat /sys/block/$disk/queue/rotational) in
431 0)
432 ssds+=("/dev/$disk")
433 echo $0: found ssd /dev/$disk
434 ;;
435 1)
436 hdds+=("/dev/$disk")
437 echo $0: found hdd /dev/$disk
438 ;;
439 *) echo "$0: error: unknown /sys/block/$disk/queue/rotational: \
440 $(cat $disk/queue/rotational)"; exit 1 ;;
441 esac
442 done
443
444 # install all ssds, or if there are none, all hdds.
445 # Note, usb flash disks are seen as rotational, which is
446 # very odd, but convenient for ignoring them here.
447 if ! ifclass ROTATIONAL && (( ${#ssds[@]} > 0 )); then
448 read -ra short_devs<<<"${ssds[@]}"
449 else
450 read -ra short_devs<<<"${hdds[@]}"
451 fi
452
453 pvn=1
454
455 # rootn=1
456 # root2n=2
457 # swapn=3
458 # bootn=4
459 # boot2n=5
460
461 efin=2
462 # ext partition so grub can write persistent variables,
463 # so it can do a one time boot. grub can't write to
464 # btrfs or any cow fs because it's more
465 # more complicated to do and they don't want to.
466 grub_extn=3
467 # bios boot partition,
468 # https://wiki.archlinux.org/index.php/GRUB
469 bios_grubn=4
470 # for an even raid (raid 1), when one disk is bigger, this partition goes on the big disk
471 even_bign=5
472 lastn=$even_bign
473 # check if the partitions exist have the right filesystems
474 #blkid="$(blkid -s TYPE)"
475 for dev in ${short_devs[@]}; do
476 if $partition; then break; fi
477 y=$(readlink -f $dev)
478 # shellcheck disable=SC2206 # globbing is intended
479 arr=($y?*)
480 if (( ${#arr[@]} < lastn )); then
481 partition=true
482 fi
483 # On one system, blkid is missing some partitions.
484 # maybe we need a flag, like FUZZY_BLKID or something, so we
485 # can check that at least some exist.
486 # for x in "`rootdev`: TYPE=\"crypto_LUKS\"" "`bootdev`: TYPE=\"btrfs\""; do
487 # echo "$blkid" | grep -Fx "$x" &>/dev/null || partition=true
488 # done
489 done
490
491 if $partition && ifclass PARTITION_PROMPT; then
492 echo "Press any key except ctrl-c to continue and partition these drives:"
493 echo " ${short_devs[*]}"
494 read -r
495 fi
496
497 devs=()
498 vgs=()
499 root_devs=()
500 swap_devs=()
501 shopt -s extglob
502 partsuffix=-part
503 for short_dev in ${short_devs[@]}; do
504 dev="$(devbyid $short_dev)"
505 if [[ $dev != */by-id/* ]]; then
506 # no by-id link, assume we are in a vm and this is true for all devs.
507 partsuffix=
508 fi
509 # for vms, cant name a vg the same as the short device name, they
510 # conflict: /dev/$vg is already taken
511
512 dname=${dev##*/}
513 vg=vg$dname
514 vgs+=("$vg")
515 devs+=("$dev")
516 if fsf; then
517 root_devs+=(/dev/$vg/root)
518 swap_devs+=(/dev/$vg/swap)
519 else
520 root_devs+=(/dev/mapper/crypt-$vg-root)
521 swap_devs+=(/dev/mapper/crypt-$vg-swap)
522 fi
523 done
524 first_root_dev=${root_devs[0]}
525 if [[ ! ${devs[0]} ]]; then
526 echo "$0: error: failed to detect devs" >&2
527 exit 1
528 fi
529
530
531
532 pvsuf=$partsuffix$pvn
533 efisuf=$partsuffix$efin
534 grub_extsuf=$partsuffix$grub_extn
535 bios_grubsuf=$partsuffix$bios_grubn
536 even_bigsuf=$partsuffix$even_bign
537
538
539 boot_space=0
540 first=true
541 boot_devs=()
542 boot2_devs=()
543 for dev in ${devs[@]}; do
544 vg=vg${dev##*/}
545 # I ran into a machine (frodo) where the bios doesn't know about some disks,
546 # so 1st stage of grub also doesn't know about them.
547 # Also, grub does not support mounting degraded btrfs as far as
548 # I can tell with some googling.
549 # From within an arch install env, I could detect them by noting
550 # their partitions were mixed with the next disk in /dev/disk/by-path,
551 # and I have mixed model disks, and I could see the 8 models which showed
552 # up in the bios, and thus see which 2 models were missing.
553 # hdparm -I /dev/sdh will give model info in linux.
554 # However, in fai on jessie, /dev/disk/by-path dir doesn't exist,
555 # and I don't see another way, so I'm hardcoding them.
556 # We still put grub on them and partition them the same, for uniformity
557 # and in case they get moved to a system that can recognize them,
558 # we just exclude them from the boot filesystem.
559 cd /dev/disk/by-id/
560 bad_disk=false
561 for id in ata-TOSHIBA_MD04ACA500_8539K4TQFS9A \
562 ata-TOSHIBA_MD04ACA500_Y5IFK6IJFS9A; do
563 if [[ $(readlink -f $id) == "$(readlink -f $dev)" ]]; then
564 bad_disk=true
565 break
566 fi
567 done
568 if $bad_disk; then
569 continue
570 fi
571 boot_devs+=(/dev/$vg/boot)
572 boot2_devs+=(/dev/$vg/boot2)
573 boot_space=$(( boot_space + $(parted -m $dev unit MiB print | \
574 sed -nr "s#^/dev/[^:]+:([0-9]+).*#\1#p") - 1))
575 if $first; then
576 first_efi=$dev$efisuf
577 first_grub_extdev=$dev$grub_extsuf
578 first=false
579 fi
580 done
581 first_boot_dev=${boot_devs[0]}
582
583 even_raid=false
584 if ifclass RAID0 || (( ${#boot_devs[@]} == 1 )); then
585 raid_level=0
586 raid_duplication=1
587 elif ifclass RAID1 || (( ${#boot_devs[@]} == 2 )); then
588 if (( ${#boot_devs[@]} == 2 )); then
589 even_raid=true
590 fi
591 raid_level=1
592 raid_duplication=2
593 elif ifclass RAID1c3 || (( ${#boot_devs[@]} == 3 )); then
594 raid_level=1c3
595 raid_duplication=3
596 else
597 raid_level=10
598 raid_duplication=2
599 fi
600
601
602
603 ### Begin calculate boot partition space
604 # due to raid duplication
605 case $raid_level in
606 1|10) boot_space=$(( boot_space / 2 )) ;;
607 1c3) boot_space=$(( boot_space / 3 )) ;;
608 esac
609 if fsf; then
610 boot_mib=4000
611 elif (( boot_space > 900000 )); then
612 # this is larger than needed for several /boot subvols,
613 # becuase I keep a minimal debian install on it for
614 # recovery needs and for doing pxe-kexec.
615 boot_mib=10000
616 root2_mib=500000
617 boot2_mib=5000
618 elif (( boot_space > 30000 )); then
619 boot_mib=$(( 5000 + (boot_space - 30000) / 2 ))
620 root2_mib=100
621 boot2_mib=100
622 else
623 # Small vms don't have room for /boot recovery. With 3 kernels
624 # installed, i'm using 132M on t8, so this seems like plenty of
625 # room. note: rhel 8 recomments 1g for /boot. u20.04 with 3 kernels =
626 # 308 mb, so things have grown significantly
627 boot_mib=1000
628 root2_mib=100
629 boot2_mib=100
630 fi
631 boot_part_mib=$(( boot_mib * raid_duplication / ${#boot_devs[@]} ))
632
633 if zilap; then
634 boot2_part_mib=0
635 root2_part_mib=0
636 else
637 boot2_part_mib=$(( boot2_mib * raid_duplication / ${#boot_devs[@]} ))
638 root2_part_mib=$(( root2_mib * raid_duplication / ${#root_devs[@]} ))
639 fi
640 ### end calculate boot partition space
641
642
643 if [[ ! $DISTRO ]]; then
644 if ifclass VOL_BULLSEYE_BOOTSTRAP; then
645 DISTRO=debianbullseye_bootstrap
646 elif ifclass VOL_STRETCH; then
647 DISTRO=debianstretch
648 elif ifclass VOL_BUSTER; then
649 DISTRO=debianbuster
650 elif ifclass VOL_BULLSEYE; then
651 DISTRO=debianbullseye
652 elif ifclass VOL_BOOKWORM; then
653 DISTRO=debianbookworm
654 elif ifclass VOL_TESTING; then
655 DISTRO=debiantesting
656 elif ifclass VOL_XENIAL; then
657 DISTRO=ubuntuxenial
658 elif ifclass VOL_BIONIC; then
659 DISTRO=ubuntubionic
660 elif ifclass VOL_FOCAL; then
661 DISTRO=ubuntufocal
662 elif ifclass VOL_JAMMY; then
663 DISTRO=ubuntujammy
664 elif ifclass VOL_FLIDAS; then
665 DISTRO=trisquelflidas
666 elif ifclass VOL_ETIONA; then
667 DISTRO=trisqueletiona
668 elif ifclass VOL_NABIA; then
669 DISTRO=trisquelnabia
670 elif ifclass VOL_ARAMO; then
671 DISTRO=trisquelaramo
672 elif $mkroot2 || $mkroot2tab; then
673 :
674 else
675 echo "PARTITIONER ERROR: no distro class/var set" >&2
676 exit 1
677 fi
678 fi
679
680 if [[ $DISTRO == debianbullseye_bootstrap ]]; then
681 # this is just convenience for the libreboot_grub config
682 # so we can glob the other ones easier.
683 boot_vol=$DISTRO
684 else
685 boot_vol=boot_$DISTRO
686 fi
687
688
689 # 1.5 x based on https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/Installation_Guide/sect-disk-partitioning-setup-x86.html#sect-custom-partitioning-x86
690 swap_mib=$(( $(grep ^MemTotal: /proc/meminfo | \
691 awk '{print $2}') * 3/(${#devs[@]} * 2 ) / 1024 ))
692
693 shopt -s nullglob
694 ##### end variable setup
695
696
697
698
699 if $mkroot2 || $mkroot2tab; then
700 getluks
701 doroot2
702 elif $mktab; then
703 mktab
704 exit 0
705 else
706 mktab
707 if ! fsf; then
708 getluks
709 fi
710 fi
711
712
713 if $partition; then
714 ### begin wipefs
715 if [[ ! $SPECIAL_DISK ]]; then
716 for lv in $(lvs --noheadings -o lv_path); do
717 wipefs -a $lv
718 done
719 for vg in $(vgs --noheadings -o vgname); do
720 vgchange -an $vg
721 vgremove -ff $vg
722 done
723 for pv in $(pvs --noheadings -o pvname); do
724 pvremove -ff $pv
725 done
726 for dev in ${devs[@]}; do
727 # if we repartition to the same as an old partition,
728 # we don't want any old fses hanging around.
729 count_down=10
730 # wipefs has failed, manual run works, google suggests timing issue
731 while ! wipefs -a $dev; do
732 sleep 2
733 count_down=$((count_down - 1))
734 (( count_down > 0 )) || exit 1
735 done
736 done
737 fi
738 ### end wipefs
739
740
741 # When we have 2 disks of at least 100g difference in size,
742 # make an extra partition on the end of the bigger one.
743 even_big_part=false
744 even_diff_min=100000
745 if $even_raid; then
746 smalli=0
747 bigi=1
748 if (( $(dev-mib ${devs[0]}) >= $(dev-mib ${devs[1]}) )); then
749 smalli=1
750 bigi=0
751 fi
752 disk_mib=$(dev-mib ${devs[smalli]})
753 even_big_dev=${devs[bigi]}
754 even_big_mib=$(dev-mib $even_big_dev)
755 if (( even_big_mib - disk_mib > even_diff_min )); then
756 even_big_part=true
757 fi
758 fi
759
760 for dev in ${devs[@]}; do
761 vg=vg${dev##*/}
762 if [[ $SPECIAL_DISK ]]; then
763 dev=$(devbyid $SPECIAL_DISK)
764 fi
765
766 # parted will round up the disk size. Do -1 so we can have
767 # fully 1MiB unit partitions for easy resizing of the last partition.
768 # Otherwise we would pass in -0 for the end argument for the last partition.
769 #
770 # Note: parted print error output is expected. example:
771 # Error: /dev/vda: unrecognised disk label
772 if ! $even_raid; then
773 disk_mib=$(dev-mib)
774 fi
775
776
777 parted -s $dev mklabel gpt
778 # MiB because parted complains about alignment otherwise.
779 pcmd="parted -a optimal -s -- $dev"
780 # main lvm partition
781 $pcmd mkpart primary ext3 524MiB ${disk_mib}MiB
782 $pcmd name $pvn pv
783
784 pvcreate -y $dev$pvsuf
785 vgcreate -y $vg $dev$pvsuf
786
787 if fsf; then
788 root_mib=40000
789 else
790 # 600 = uefi 512 + grubext 8 + bios grub 3 + some extra cuz this is lvm
791 root_mib=$(( disk_mib - root2_part_mib - swap_mib - boot_part_mib - boot2_part_mib - 600 ))
792 fi
793
794 # -L unit default mebibyte
795 lvcreate -y -L $root_mib $vg -n root
796 lvcreate -y -L $swap_mib $vg -n swap
797 # unencrypted swap needs mkswap
798 if fsf; then
799 mkswap /dev/$vg/swap
800 fi
801 lvcreate -y -L $boot_part_mib $vg -n boot
802
803 if zilap; then
804 # todo: now that we are using lvm, this doesnt need to be done until mkroot2
805 lvcreate -y -L $root2_part_mib $vg -n root2
806 lvcreate -y -L $boot2_part_mib $vg -n boot2
807 fi
808
809 # uefi partition, for normal bios systems, its just in case.
810 $pcmd mkpart primary "fat32" 12MiB 524MiB
811 $pcmd name $efin efi
812 # note, this is shown here: https://support.system76.com/articles/bootloader/
813 # but not mentioned https://wiki.archlinux.org/index.php/EFI_system_partition
814 # might not be needed
815 $pcmd set $efin esp on
816
817 # i only need a few k, but googling min size,
818 # I found someone saying that gparted required
819 # required at least 8 because of their hard drive cylinder size.
820 # And 8 is still very tiny.
821 # grub_ext partition
822 $pcmd mkpart primary "ext2" 4MiB 12MiB
823 $pcmd name $grub_extn grubext
824 # gpt ubuntu cloud image uses ~4 mb for this partition. fai uses 1 MiB.
825 # so, I use 3, whatever.
826 # note: parted manual saying cheap flash media
827 # should to start at 4.
828 # biols grub partition
829 $pcmd mkpart primary "" 1MiB 4MiB
830 $pcmd name $bios_grubn biosgrub
831 $pcmd set $bios_grubn bios_grub on
832 if $even_big_part && [[ $dev == "$even_big_dev" ]]; then
833 $pcmd mkpart primary ext3 ${disk_mib}MiB ${even_big_mib}MiB
834 $pcmd name $even_bign even_big
835 fi
836
837 # the mkfs failed before on a vm, which prompted me to add
838 # sleep .1
839 # then it failed again on a physical machine
840 # with:
841 # Device /dev/disk/by-id/foo doesn't exist or access denied,
842 # so I added a wait until it existed.
843 # Then I added the mkfs.ext2, which claimed to succeed,
844 # but then couldn't be found upon reboot. In that case we didn't
845 # wait at all. So I've added a 3 second minimum wait.
846 secs=0
847 while [[ ! -e $dev$bios_grubsuf ]] && (( secs < 10 )); do
848 sleep 1
849 secs=$((secs +1))
850 done
851 sleep 3
852
853 mkfs.fat -F32 $dev$efisuf
854
855 if ! fsf && $even_big_part && [[ $dev == "$even_big_dev" ]]; then
856 luks-setup $even_big_dev ${even_big_dev##*/}
857 mkfs.btrfs -f /dev/mapper/${even_big_dev##*/}
858 fi
859
860 # Holds just a single file, rarely written, so
861 # use ext2, like was often used for the /boot partition.
862 # This exists because grub can only persist data to a non-cow fs.
863 # And we use persisting a var in grub to do a one time boot.
864 # We could pass the data on the kernel command line and persist it
865 # to grubenv after booting, but that relies on the boot always succeeding.
866 # This is just a bit more robust, and it could work for booting
867 # into ipxe which can't persist data, if we ever got that working.
868 mkfs.ext2 $dev$grub_extsuf
869
870 # for fsf, no encryption of root because root will not contain any
871 # sensitive data.
872 if ! fsf; then
873 luks-setup /dev/$vg/root crypt-$vg-root
874 fi
875
876 if [[ $SPECIAL_DISK ]]; then
877 exit 0
878 fi
879 done
880 ls -la /dev/btrfs-control # this was probably for debugging...
881 sleep 1
882
883 bpart ${root_devs[@]}
884 bpart ${boot_devs[@]}
885
886 else ## above: if $partition ##
887
888 if ! fsf; then
889 for vg in ${vgs[@]}; do
890 if [[ -e /dev/mapper/crypt-$vg-root ]]; then
891 continue
892 fi
893 if $rerootfs; then
894 luks-setup /dev/$vg/root crypt-$vg-root
895 else
896 cryptsetup luksOpen /dev/$vg/root $vg-root \
897 --key-file $luks_file
898 fi
899 done
900 fi
901
902 if $rerootfs; then
903 sleep 1
904 bpart ${root_devs[@]}
905 fi
906 sleep 1
907 fi
908
909
910 if $wipe && [[ $DISTRO != debianbullseye_bootstrap ]]; then
911 # bootstrap distro doesn't use separate encrypted root.
912 mount -o subvolid=0 ${root_devs[0]} /mnt
913 # systemd creates subvolumes we want to delete.
914 mapfile -t s < <(btrfs subvolume list --sort=-path /mnt |
915 sed -rn "s#^.*path\s*(root_$DISTRO/\S+)\s*\$#\1#p")
916 for subvol in ${s[@]}; do btrfs subvolume delete /mnt/$subvol; done
917 btrfs subvolume set-default 0 /mnt
918 [[ ! -e /mnt/root_$DISTRO ]] || btrfs subvolume delete /mnt/root_$DISTRO
919
920 ## create subvols ##
921 cd /mnt
922
923 btrfs subvolume create root_$DISTRO
924
925 # could set default subvol like this, but no reason to.
926 # btrfs subvolume set-default \
927 # $(btrfs subvolume list . | grep "root_$DISTRO$" | awk '{print $2}') .
928
929 # For raid systems, cow allows for error correction, for non-raid systems,
930 # protects root fs from having the plug pulled. Reprovisioning a root
931 # subvol is not my favorite thing to do.
932 # # no cow on the root filesystem. it's setup is fully scripted,
933 # # if it's messed up, we will just recreated it,
934 # # and we can get better perf with this.
935 # # I can't remember exactly why, but this is preferable to mounting with
936 # # -o nodatacow, I think because subvolumes inherit that.
937 # chattr -Rf +C root_$DISTRO
938 cd /
939 umount /mnt
940 fi
941
942 mount -o subvolid=0 $first_boot_dev /mnt
943 cd /mnt
944 btrfs subvolume set-default 0 /mnt # already default, just ensuring it.
945
946 # for libreboot systems. grub2 only reads from subvolid=0
947 mkdir -p /mnt/grub2
948 # todo: this probably needs updating for our lvm transition
949 cp $FAI/distro-install-common/libreboot_grub.cfg /mnt/grub2
950
951 if $wipe && [[ -e /mnt/$boot_vol ]]; then
952 btrfs subvolume delete /mnt/$boot_vol
953 fi
954 if [[ ! -e /mnt/$boot_vol ]]; then
955 btrfs subvolume create $boot_vol
956 fi
957 cd /
958 umount /mnt
959 ## end create subvols ##
960
961 mount $first_grub_extdev /mnt
962 grub-editenv /mnt/grubenv set did_fai_check=true
963 grub-editenv /mnt/grubenv set last_boot=/$boot_vol
964 umount /mnt
965
966
967 # initial setup of extra data fs, mounted,
968 # btrfs subvol create nocow
969 # chattr +C nocow
970 # chown iank.iank nocow
971