fix mixed disk sizes & other multi disk issues
[automated-distro-installer] / fai / config / hooks / partition.DEFAULT
1 #!/bin/bash -x
2
3 set -eE -o pipefail
4 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?"' ERR
5
6 # # fai's setup-storage won't do btrfs on luks,
7 # # so we do it ourself :)
8
9 skiptask partition || ! type skiptask # for running not in fai
10
11 #### begin configuration
12
13 bootn=3
14 rootn=1
15 swapn=2
16 bios_grubn=4
17 boot_mib=1500
18
19
20 ##### end configuration
21
22 if ifclass REPARTITION;then
23 partition=true # force a full wipe
24 else
25 partition=false # change to true to force a full wipe
26 fi
27
28 lastn=$bios_grubn
29
30
31 hds=()
32 ssds=()
33 cd /sys/block
34 for disk in [sv]d[a-z]; do
35 case $(cat $disk/queue/rotational) in
36 0) ssds+=(/dev/$disk) ;;
37 1) hds+=(/dev/$disk) ;;
38 *) echo "$0: error: unknown /sys/block/$disk/queue/rotational: \
39 $(cat $disk/queue/rotational)"; exit 1 ;;
40 esac
41 done
42
43 # install all ssds, or if there are none, all hdds
44 if (( ${#ssds[@]} > 0 )); then
45 devs=( ${ssds[@]} )
46 else
47 devs=( ${hds[@]} )
48 fi
49
50 boot_devs=()
51 for dev in ${devs[@]}; do
52 if ifclass frodo; then
53 # I ran into a machine where the bios doesn't know about some disks,
54 # so 1st stage of grub also doesn't know about them.
55 # Also, grub does not support mounting degraded btrfs as far as
56 # I can tell with some googling.
57 # From within an arch install env, I could detect them by noting
58 # their partitions were mixed with the next disk in /dev/disk/by-path,
59 # and I have mixed model disks, and I could see the 8 models which showed
60 # up in the bios, and thus see which 2 models were missing.
61 # hdparm -I /dev/sdh will give model info in linux.
62 # However, in fai on jessie, that dir doesn't exist,
63 # and I don't see another way, so I'm hardcoding them.
64 # We still put grub on them and partition them the same, for uniformity
65 # and in case they get moved to a system that can recognize them,
66 # we just exclude them from the boot filesystem.
67 cd /dev/disk/by-id/
68 bad_disk=false
69 for id in ata-TOSHIBA_MD04ACA500_8539K4TQFS9A \
70 ata-TOSHIBA_MD04ACA500_Y5IFK6IJFS9A; do
71 if [[ $(readlink -f $id) == $dev ]]; then
72 bad_disk=true
73 break
74 fi
75 done
76 $bad_disk || boot_devs+=($dev$bootn)
77 else
78 boot_devs+=($dev$bootn)
79 fi
80 done
81
82 if [[ ! $DISTRO ]]; then
83 if ifclass STABLE; then
84 DISTRO=debianjessie
85 else
86 DISTRO=debiantesting
87 fi
88 fi
89
90
91
92 case ${#boot_devs[@]} in
93 # need double the space if we are raid 10, and then
94 # might as well give some extra overhead.
95 [4-9]*|[1-3]?*) boot_mib=$((boot_mib * 3)) ;;
96 esac
97
98
99 bpart() { # btrfs a partition
100 dev_n=$1
101 case ${#@} in
102 [1-3]) mkfs.btrfs -f $@ ;;
103 [4-9]*|[1-3]?*) mkfs.btrfs -f -m raid10 -d raid10 $@ ;;
104 esac
105 }
106
107 first_boot_dev=${devs[0]}$bootn
108
109 crypt_devs=()
110 # somewhat crude detection of whether to partition
111 for dev in ${devs[@]}; do
112 crypt_devs+=( /dev/mapper/crypt_dev_${dev#/dev/} )
113 x=($dev[0-9])
114 [[ ${#x[@]} == ${lastn} ]] || partition=true
115 for (( i=1; i <= $lastn; i++ )); do
116 [[ -e ${dev}$i ]] || partition=true
117 done
118 for part in $dev$rootn $dev$bootn; do
119 # type tells us it's not totally blank
120 blkid | grep "^${part}:.*TYPE=" &>/dev/null || partition=true
121 done
122 done
123
124 # keyfiles generated like:
125 # head -c 2048 /dev/urandom | od | s dd of=/q/root/luks/host-demohost
126 luks_dir=${LUKS_DIR:-/var/lib/fai/config/distro-install-common/luks}
127 if ifclass tp; then
128 lukspw=$(cat $luks_dir/traci)
129 else
130 lukspw=$(cat $luks_dir/ian)
131 fi
132 if ifclass demohost; then
133 lukspw=x
134 fi
135
136
137 crypt=${crypt_devs[0]}$rootn
138
139 bios_grub_end=4
140 # 1.5 x based on https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/Installation_Guide/sect-disk-partitioning-setup-x86.html#sect-custom-partitioning-x86
141 swap_mib=$(( $(grep ^MemTotal: /proc/meminfo | \
142 awk '{print $2}') * 3/(${#devs[@]} * 2 ) / 1024 ))
143
144 mkdir -p /tmp/fai
145 shopt -s nullglob
146 if $partition; then
147 for dev in ${devs[@]}; do
148 for x in $dev[0-9]; do
149 count_down=10
150 # wipefs has failed, manual run works, google suggests timing issue
151 while ! wipefs -a $x; do
152 sleep 2
153 count_down=$((count_down - 1))
154 (( count_down > 0 )) || exit 1
155 done
156 done
157 done
158 for dev in ${devs[@]}; do
159 # parted will round up the disk size. Do -1 so we can have
160 # fully 1MiB unit partitions for easy resizing of the last partition.
161 # Otherwise we would pass in -0 for the end argument for the last partition.
162 disk_mib=$(( $(parted -m $dev unit MiB print | \
163 sed -nr "s#^$dev:([0-9]+).*#\1#p") - 1))
164 root_end=$(( disk_mib - swap_mib - boot_mib / ${#boot_devs[@]} ))
165 swap_end=$(( root_end + swap_mib))
166
167 parted -s $dev mklabel gpt
168 # gpt ubuntu cloud image uses ~4. fai uses 1 MiB.
169 # I read something in the parted manual saying cheap flash media
170 # likes to start at 4.
171 # MiB because parted complains about alignment otherwise.
172 pcmd="parted -a optimal -s -- $dev"
173 $pcmd mkpart primary "ext3" 4MiB ${root_end}MiB
174 $pcmd mkpart primary "linux-swap" ${root_end}MiB ${swap_end}MiB
175 $pcmd mkpart primary "" ${swap_end}MiB ${disk_mib}MiB
176 $pcmd mkpart primary "" 1MiB 4MiB
177 $pcmd set $bios_grubn bios_grub on
178 $pcmd set $bootn boot on # generally not needed on modern systems
179 # the mkfs failed randomly on a vm, so I threw a sleep in here.
180 sleep .1
181
182 luks_dev=$dev$rootn
183 yes YES | cryptsetup luksFormat $luks_dev $luks_dir/host-$HOSTNAME \
184 -c aes-cbc-essiv:sha256 -s 256 || [[ $? == 141 ]]
185 yes "$lukspw" | \
186 cryptsetup luksAddKey --key-file $luks_dir/host-$HOSTNAME \
187 $luks_dev || [[ $? == 141 ]]
188 # background: Keyfile and password are treated just
189 # like 2 ways to input a passphrase, so we don't actually need to have
190 # different contents of keyfile and passphrase, but it makes some
191 # security sense to a really big randomly generated passphrase
192 # as much as possible, so we have both.
193 #
194 # This would remove the keyfile.
195 # yes 'test' | cryptsetup luksRemoveKey /dev/... \
196 # /key/file || [[ $? == 141 ]]
197
198 cryptsetup luksOpen $luks_dev crypt_dev_${luks_dev##/dev/} \
199 --key-file $luks_dir/host-$HOSTNAME
200 done
201 bpart ${crypt_devs[@]/%/$rootn}
202 bpart ${boot_devs[@]}
203 else
204 for dev in ${devs[@]}; do
205 cryptsetup luksOpen $dev$rootn crypt_dev_${dev##/dev/}$rootn \
206 --key-file $luks_dir/host-$HOSTNAME || [[ $? == 141 ]]
207 done
208 sleep 1
209 fi
210
211 mount -o subvolid=0 $crypt /mnt
212 # systemd creates subvolumes we want to delete.
213 s=($(btrfs subvolume list --sort=-path /mnt |
214 sed -rn "s#^.*path\s*(root_$DISTRO/\S+)\s*\$#\1#p"))
215 for subvol in ${s[@]}; do btrfs subvolume delete /mnt/$subvol; done
216 btrfs subvolume set-default 0 /mnt
217 [[ ! -e /mnt/root_$DISTRO ]] || btrfs subvolume delete /mnt/root_$DISTRO
218
219
220 ## create subvols ##
221 cd /mnt
222 for x in q home_$DISTRO root_$DISTRO; do
223 btrfs subvolume list . | grep "$x$" >/dev/null || btrfs subvolume create $x
224 chown root:1000 q
225 done
226 mkdir -p /mnt/root_$DISTRO/boot
227 for x in root/a q/a; do
228 mkdir -p $x
229 chown 1000:1000 $x
230 chmod 755 $x
231 done
232 # could set default like this, but no reason to.
233 # btrfs subvolume set-default \
234 # $(btrfs subvolume list . | grep "root_$DISTRO$" | awk '{print $2}') .
235 chattr -Rf +C root_$DISTRO
236 cd /
237 umount /mnt
238 mount -o subvolid=0 $first_boot_dev /mnt
239 cd /mnt
240 btrfs subvolume set-default 0 /mnt
241 [[ ! -e /mnt/boot_$DISTRO ]] || btrfs subvolume delete /mnt/boot_$DISTRO
242 btrfs subvolume create boot_$DISTRO
243 cd /
244 umount /mnt
245 ## end create subvols ##
246
247
248
249 cat > /tmp/fai/fstab <<EOF
250 $crypt / btrfs noatime,subvol=root_$DISTRO 0 0
251 $crypt /q btrfs noatime,subvol=q 0 0
252 /q/a /a none bind 0 0
253 $crypt /home btrfs noatime,subvol=home_$DISTRO 0 0
254 $first_boot_dev /boot btrfs noatime,subvol=boot_$DISTRO 0 0
255 EOF
256
257 swaps=()
258 for dev in ${devs[@]}; do
259 s=crypt_swap_${dev##/dev/}$swapn
260 swaps+=(/dev/mapper/$s)
261 cat >>/tmp/fai/crypttab <<EOF
262 crypt_dev_${dev##/dev/}$rootn $dev$rootn none keyscript=/root/keyscript,discard,luks
263 $s $dev$swapn /dev/urandom swap,cipher=aes-xts-plain64,size=256,hash=ripemd160
264 EOF
265 cat >> /tmp/fai/fstab <<EOF
266 /dev/mapper/$s none swap sw 0 0
267 EOF
268 done
269
270 # fai would do this:
271 #BOOT_DEVICE=\${BOOT_DEVICE:-"${devs[0]}"}
272
273 # swaplist seems to do nothing.
274 cat >/tmp/fai/disk_var.sh <<EOF
275 ROOT_PARTITION=\${ROOT_PARTITION:-$crypt}
276 BOOT_PARTITION=\${BOOT_PARTITION:-$first_boot_dev}
277 BOOT_DEVICE="${devs[@]}"
278 SWAPLIST=\${SWAPLIST:-"${swaps[@]}"}
279 EOF