From: Ian Kelling Date: Fri, 2 Sep 2016 04:38:28 +0000 (-0700) Subject: fix pxe server leaving background procs X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=c4337bdfa287cb3160b294d58c64b51f870117ea;p=automated-distro-installer fix pxe server leaving background procs --- diff --git a/dsfull b/dsfull index 5d50eab..1fea3bc 100755 --- a/dsfull +++ b/dsfull @@ -1,10 +1,11 @@ #!/bin/bash -l # Copyright (C) 2016 Ian Kelling -# distro setup full. (assuming we already synced data files to the host) +# distro setup full using fai. (assuming we already synced data files to the host) set -eE -o pipefail -trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR +cleanup() { :; } +trap 'cleanup; echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR reboot=true if [[ $1 == -r ]]; then @@ -25,8 +26,20 @@ if $reboot; then ssh $host "touch /tmp/keyscript-off; sudo reboot" ||: fi -pxe-server fai $host -while ! ssh $host :; do +cleanup() { pxe-server :; } +pxe-server -a fai $host +cleanup() { :; } + +timedout=true +for ((i=0; i<240; i++)); do + if timeout -s 9 10 ssh $host :; then + timedout=false + break + fi sleep 5 done +if $timeout; then + echo "$0: error: timeout" + exit 1 +fi dsremote $host diff --git a/fai-revm b/fai-revm index 2361ecf..6271a36 100755 --- a/fai-revm +++ b/fai-revm @@ -22,8 +22,12 @@ set -x # Deploy fai configuration to faiserver, # then start a virtual machine to test the config. +cleanup() { :; } set -eE -o pipefail -trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR +trap 'cleanup; echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR +script_dir=$(dirname $(readlink -f "$BASH_SOURCE")) + +e() { echo "$*"; "$@"; } # I had this set false as default before, can't remember why. oh well. redeploy=true @@ -33,7 +37,6 @@ fi disk_count=2 -script_dir=$(dirname $(readlink -f "$BASH_SOURCE")) if [[ $script_dir == /a/bin/* ]]; then # Copy our script elsewhere so we can develop it @@ -53,13 +56,14 @@ is_arch_revm() { new_disk=false [[ ! $1 ]] || new_disk=true +cleanup() { ./pxe-server :; } if is_arch_revm; then - ./pxe-server arch & + ./pxe-server arch sleep 2 # via osinfo-query os. guessing arch is closest to latest fedora. variant=fedora22 else - ./pxe-server -a fai & + ./pxe-server fai sleep 2 if $redeploy; then ./fai-redep @@ -69,7 +73,7 @@ fi name=demohost -s virshrm $name ||: +e s virshrm $name ||: disk_arg=() @@ -78,7 +82,7 @@ for ((i=1; i <= disk_count; i++)); do disk_arg+=("--disk path=$f") if $new_disk || [[ ! -e $f ]]; then s rm -f $f - s qemu-img create -o preallocation=metadata -f qcow2 $f 20G + e s qemu-img create -o preallocation=metadata -f qcow2 $f 20G fi done @@ -91,7 +95,7 @@ fi # init_module+0x108/0x1000 [raid6_pq] # # uniq is to stop gtk-warning spam -s virt-install --os-variant $variant -n $name --pxe -r 2048 --vcpus 1 \ +e s virt-install --os-variant $variant -n $name --pxe -r 2048 --vcpus 1 \ ${disk_arg[*]} -w bridge=br0,mac=52:54:00:9c:ef:ad \ --graphics spice,listen=0.0.0.0 $console_arg |& grep -v '^ *$' | uniq & @@ -99,8 +103,10 @@ if [[ $SSH_CLIENT ]]; then fg fi - +sleep 30 +while ! timeout -s 9 10 ssh root@$name /bin/true; do sleep 5; done +cleanup() { :; } +pxe-server : if is_arch_revm; then - while ! timeout -s 9 10 ssh root@$name /bin/true; do sleep 1; done ./arch-init-remote $name fi diff --git a/faiserver-enable b/faiserver-enable index 66e0bee..2492b2e 100755 --- a/faiserver-enable +++ b/faiserver-enable @@ -15,4 +15,5 @@ if [[ $1 ]]; then usage 1 fi -ssh root@$(chost faiserver) "sed -ri --follow-symlinks 's,^\s*#\s*(/srv/fai/.*),\1,' /etc/exports; exportfs -ra" +host=$(chost faiserver) +ssh root@$host "sed -ri --follow-symlinks 's,^\s*#\s*(/srv/fai/.*),\1,' /etc/exports; exportfs -ra" diff --git a/faiserver-revm b/faiserver-revm index 2b2864a..e5237c3 100755 --- a/faiserver-revm +++ b/faiserver-revm @@ -5,10 +5,12 @@ set -x set -eE -o pipefail -trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR +cleanup() { :; } +trap 'cleanup; echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR cd "${BASH_SOURCE%/*}" +cleanup() { pxe-server :; } ./debian-pxe-preseed -i 192.168.1.1 -u ian -g vda name=faiserver @@ -28,6 +30,8 @@ while ! scp $opts faiserver-setup root@faiserver:; do sleep 5 done +# note: with a vm, pxe boot is turned off in the bios after it's first reboot. +cleanup() { :; } ./pxe-server : ssh $opts root@faiserver ./faiserver-setup diff --git a/pxe-server b/pxe-server index 1f75adb..b8e2092 100755 --- a/pxe-server +++ b/pxe-server @@ -36,27 +36,28 @@ HOST makes the pxe server only for that specific host -h|--help Print help and exit -- Subsequent arguments are never treated as options --p Persist. Otherwise, wait for dhcp acks then remove. -r Don't redeploy fai config. --a Wait for 2 dhcp acks instead of the default 3. Some distros - do 2, some do 3. +-a Wait for 2 dhcp acks, then disable the pxe server after a delay. + First ack is for pxe boot, 2nd ack is for os boot. Sometimes + on debian, there is a 3rd one shortly after the 2nd. I can't remember + exactly why this caused a problem, but I'm hoping the sleep + will take care of it. EOF exit $1 } ##### begin command line parsing ######## -persist=false args=() redep=true -acks=3 +acks=2 +wait=false while [[ $1 ]]; do case $1 in --) shift; break ;; -h|--help) usage ;; -r) redep=false; shift ;; - -p) persist=true; shift ;; - -a) acks=2; shift ;; + -a) wait=true; shift ;; *) args+=("$1"); shift ;; esac done @@ -74,14 +75,9 @@ if [[ $host ]]; then host_tag="tag:$host," fi -case $type in - :|true) persist=true ;; - arch) acks=2 ;; -esac - ##### end command line parsing ######## -sv() { +e() { echo "$@" "$@" } @@ -123,7 +119,7 @@ ack-wait() { echo $line fi done < <(ssh wrt logread -f) - sv sleep 5 + e sleep 20 } set-pxe() { @@ -131,16 +127,18 @@ set-pxe() { if [[ $type == arch ]]; then arch-pxe-mount; fi" } -set-pxe +if ! $wait && [[ $type != : ]]; then + set-pxe -if [[ $type == fai ]]; then - if $redep; then - fai-redep + if [[ $type == fai ]]; then + if $redep; then + fai-redep + fi + faiserver-enable fi - faiserver-enable fi -if ! $persist; then +if $wait; then # fai's debian jessie 8.5ish does 2 dhcp requests when booting, # roughly 4 seconds apart. Earlier # versions did just 1. Now testing on a vm, it does 1. @@ -148,11 +146,15 @@ if ! $persist; then echo "waiting for $acks dhcp acks then disabling pxe" ack-wait $acks set-pxe : - if [[ $type == fai ]]; then - # fai server can contain sensitive info, so turn it off - # when it's not in use. - echo "waiting for 1 dhcp ack then disabling fai server" - ack-wait 1 - faiserver-disable - fi + + # previously tried waiting for one more ack then disabling faiserver, + # since it can contain sensitive info, so turn it off when not in use, + # but disabling that for now as it's inconvenient to clean this + # up and run it in the background etc. + + # if [[ $type == fai ]]; then + # echo "waiting for 1 dhcp ack then disabling fai server" + # ack-wait 1 + # faiserver-disable + # fi fi diff --git a/wrt-setup b/wrt-setup index 755f2d4..dbef443 100755 --- a/wrt-setup +++ b/wrt-setup @@ -197,6 +197,7 @@ v cedit /etc/hosts <