#!/bin/bash # # Kdump common variables and functions # . /usr/lib/kdump/kdump-lib-initramfs.sh FADUMP_ENABLED_SYS_NODE="/sys/kernel/fadump/enabled" FADUMP_REGISTER_SYS_NODE="/sys/kernel/fadump/registered" is_uki() { local img img="$1" [[ -f "$img" ]] || return [[ "$(objdump -a "$img" 2> /dev/null)" =~ pei-(x86-64|aarch64-little) ]] || return objdump -h -j .linux "$img" &> /dev/null } is_fadump_capable() { # Check if firmware-assisted dump is enabled # if no, fallback to kdump check if [[ -f $FADUMP_ENABLED_SYS_NODE ]]; then rc=$(< $FADUMP_ENABLED_SYS_NODE) [[ $rc -eq 1 ]] && return 0 fi return 1 } is_sme_or_sev_active() { journalctl -q --dmesg --grep "^Memory Encryption Features active: AMD (SME|SEV)$" >/dev/null 2>&1 } is_squash_available() { local _version kmodule _version=$(_get_kdump_kernel_version) for kmodule in squashfs overlay loop; do modprobe -S "$_version" --dry-run $kmodule &> /dev/null || return 1 done } is_zstd_command_available() { [[ -x "$(command -v zstd)" ]] } dracut_have_option() { local _option=$1 ! dracut "$_option" 2>&1 | grep -q "unrecognized option" } perror_exit() { derror "$@" exit 1 } # Check if fence kdump is configured in Pacemaker cluster is_pcs_fence_kdump() { # no pcs or fence_kdump_send executables installed? type -P pcs > /dev/null || return 1 [[ -x $FENCE_KDUMP_SEND ]] || return 1 # fence kdump not configured? (pcs cluster cib | grep 'type="fence_kdump"') &> /dev/null || return 1 } # Check if fence_kdump is configured using kdump options is_generic_fence_kdump() { [[ -x $FENCE_KDUMP_SEND ]] || return 1 [[ $(kdump_get_conf_val fence_kdump_nodes) ]] } to_dev_name() { local dev="${1//\"/}" case "$dev" in UUID=*) blkid -U "${dev#UUID=}" ;; LABEL=*) blkid -L "${dev#LABEL=}" ;; *) echo "$dev" ;; esac } is_user_configured_dump_target() { [[ $(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|nfs\|ssh\|virtiofs") ]] || is_mount_in_dracut_args } get_block_dump_target() { local _target _fstype if is_ssh_dump_target || is_nfs_dump_target; then return fi _target=$(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|virtiofs") [[ -n $_target ]] && to_dev_name "$_target" && return _target=$(get_dracut_args_target "$(kdump_get_conf_val "dracut_args")") [[ -b $_target ]] && to_dev_name "$_target" && return _fstype=$(get_dracut_args_fstype "$(kdump_get_conf_val "dracut_args")") is_fs_type_virtiofs "$_fstype" && echo "$_target" && return _target=$(get_target_from_path "$(get_save_path)") [[ -b $_target ]] && to_dev_name "$_target" && return _fstype=$(get_fs_type_from_target "$_target") is_fs_type_virtiofs "$_fstype" && echo "$_target" && return } is_dump_to_rootfs() { [[ $(kdump_get_conf_val 'failure_action\|default') == dump_to_rootfs ]] } is_lvm2_thinp_dump_target() { _target=$(get_block_dump_target) [ -n "$_target" ] && is_lvm2_thinp_device "$_target" } get_failure_action_target() { local _target if is_dump_to_rootfs; then # Get rootfs device name _target=$(get_root_fs_device) [[ -b $_target ]] && to_dev_name "$_target" && return is_fs_type_virtiofs "$(get_fs_type_from_target "$_target")" && echo "$_target" && return # Then, must be nfs root echo "nfs" fi } # Get kdump targets(including root in case of dump_to_rootfs). get_kdump_targets() { local _target _root local kdump_targets _target=$(get_block_dump_target) if [[ -n $_target ]]; then kdump_targets=$_target elif is_ssh_dump_target; then kdump_targets="ssh" else kdump_targets="nfs" fi # Add the root device if dump_to_rootfs is specified. _root=$(get_failure_action_target) if [[ -n $_root ]] && [[ $kdump_targets != "$_root" ]]; then kdump_targets="$kdump_targets $_root" fi echo "$kdump_targets" } # Return the bind mount source path, return the path itself if it's not bind mounted # Eg. if /path/to/src is bind mounted to /mnt/bind, then: # /mnt/bind -> /path/to/src, /mnt/bind/dump -> /path/to/src/dump # # findmnt uses the option "-v, --nofsroot" to exclusive the [/dir] # in the SOURCE column for bind-mounts, then if $_src equals to # $_src_nofsroot, the mountpoint is not bind mounted directory. # # Below is just an example for mount info # /dev/mapper/atomicos-root[/ostree/deploy/rhel-atomic-host/var], if the # directory is bind mounted. The former part represents the device path, rest # part is the bind mounted directory which quotes by bracket "[]". get_bind_mount_source() { local _mnt _path _src _opt _fstype local _fsroot _src_nofsroot _mnt=$(df "$1" | tail -1 | awk '{print $NF}') _path=${1#$_mnt} _src=$(get_mount_info SOURCE target "$_mnt" -f) _opt=$(get_mount_info OPTIONS target "$_mnt" -f) _fstype=$(get_mount_info FSTYPE target "$_mnt" -f) # bind mount in fstab if [[ -d $_src ]] && [[ $_fstype == none ]] && (echo "$_opt" | grep -q "\bbind\b"); then echo "$_src$_path" && return fi # direct mount _src_nofsroot=$(get_mount_info SOURCE target "$_mnt" -v -f) if [[ $_src_nofsroot == "$_src" ]]; then echo "$_mnt$_path" && return fi _fsroot=${_src#${_src_nofsroot}[} _fsroot=${_fsroot%]} _mnt=$(get_mount_info TARGET source "$_src_nofsroot" -f) # for btrfs, _fsroot will also contain the subvol value as well, strip it if [[ $_fstype == btrfs ]]; then local _subvol _subvol=${_opt#*subvol=} _subvol=${_subvol%,*} _fsroot=${_fsroot#$_subvol} fi echo "$_mnt$_fsroot$_path" } get_mntopt_from_target() { get_mount_info OPTIONS source "$1" -f } # Get the path where the target will be mounted in kdump kernel # $1: kdump target device get_kdump_mntpoint_from_target() { local _mntpoint _mntpoint=$(get_mntpoint_from_target "$1") # mount under /sysroot if dump to root disk or mount under # mount under /kdumproot if dump target is not mounted in first kernel # mount under /kdumproot/$_mntpoint in other cases in 2nd kernel. # systemd will be in charge to umount it. if [[ -z $_mntpoint ]]; then _mntpoint="/kdumproot" else if [[ $_mntpoint == "/" ]]; then _mntpoint="/sysroot" else _mntpoint="/kdumproot/$_mntpoint" fi fi # strip duplicated "/" echo $_mntpoint | tr -s "/" } kdump_get_persistent_dev() { local dev="${1//\"/}" case "$dev" in UUID=*) dev=$(blkid -U "${dev#UUID=}") ;; LABEL=*) dev=$(blkid -L "${dev#LABEL=}") ;; esac echo $(get_persistent_dev "$dev") } is_ostree() { test -f /run/ostree-booted } # get ip address or hostname from nfs/ssh config value get_remote_host() { local _config_val=$1 # ipv6 address in kdump.conf is around with "[]", # factor out the ipv6 address _config_val=${_config_val#*@} _config_val=${_config_val%:/*} _config_val=${_config_val#[} _config_val=${_config_val%]} echo "$_config_val" } is_hostname() { local _hostname _hostname=$(echo "$1" | grep ":") if [[ -n $_hostname ]]; then return 1 fi echo "$1" | grep -q "[a-zA-Z]" } # Copied from "/etc/sysconfig/network-scripts/network-functions" get_hwaddr() { if [[ -f "/sys/class/net/$1/address" ]]; then awk '{ print toupper($0) }' < "/sys/class/net/$1/address" elif [[ -d "/sys/class/net/$1" ]]; then LC_ALL="" LANG="" ip -o link show "$1" 2> /dev/null | awk '{ print toupper(gensub(/.*link\/[^ ]* ([[:alnum:]:]*).*/, "\\1", 1)); }' fi } # Get value by a field using "nmcli -g" # Usage: get_nmcli_value_by_field # # "nmcli --get-values" allows us to retrive value(s) by field, for example, # nmcli --get-values connection show /org/freedesktop/NetworkManager/ActiveConnection/1 # returns the following value for the corresponding field respectively, # Field Value # IP4.DNS "10.19.42.41 | 10.11.5.19 | 10.5.30.160" # 802-3-ethernet.s390-subchannels "" # bond.options "mode=balance-rr" get_nmcli_value_by_field() { LANG=C nmcli --get-values "$@" } # Get nmcli field value of an connection apath (a D-Bus active connection path) # Usage: get_nmcli_field_by_apath get_nmcli_field_by_conpath() { local _field=$1 _apath=$2 get_nmcli_value_by_field "$_field" connection show "$_apath" } # Get nmcli connection apath (a D-Bus active connection path ) by ifname # # apath is used for nmcli connection operations, e.g. # $ nmcli connection show $apath get_nmcli_connection_apath_by_ifname() { local _ifname=$1 get_nmcli_value_by_field "GENERAL.CON-PATH" device show "$_ifname" } get_ifcfg_by_device() { grep -E -i -l "^[[:space:]]*DEVICE=\"*${1}\"*[[:space:]]*$" \ /etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1 } get_ifcfg_by_hwaddr() { grep -E -i -l "^[[:space:]]*HWADDR=\"*${1}\"*[[:space:]]*$" \ /etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1 } get_ifcfg_by_uuid() { grep -E -i -l "^[[:space:]]*UUID=\"*${1}\"*[[:space:]]*$" \ /etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1 } get_ifcfg_by_name() { grep -E -i -l "^[[:space:]]*NAME=\"*${1}\"*[[:space:]]*$" \ /etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1 } is_nm_running() { [[ "$(LANG=C nmcli -t --fields running general status 2> /dev/null)" == "running" ]] } is_nm_handling() { LANG=C nmcli -t --fields device,state dev status 2> /dev/null | grep -q "^\(${1}:connected\)\|\(${1}:connecting.*\)$" } # $1: netdev name get_ifcfg_nmcli() { local nm_uuid nm_name local ifcfg_file # Get the active nmcli config name of $1 if is_nm_running && is_nm_handling "${1}"; then # The configuration "uuid" and "name" generated by nm is wrote to # the ifcfg file as "UUID=" and "NAME=". nm_uuid=$(LANG=C nmcli -t --fields uuid,device c show --active 2> /dev/null | grep "${1}" | head -1 | cut -d':' -f1) nm_name=$(LANG=C nmcli -t --fields name,device c show --active 2> /dev/null | grep "${1}" | head -1 | cut -d':' -f1) ifcfg_file=$(get_ifcfg_by_uuid "${nm_uuid}") [[ -z ${ifcfg_file} ]] && ifcfg_file=$(get_ifcfg_by_name "${nm_name}") fi echo -n "${ifcfg_file}" } # $1: netdev name get_ifcfg_legacy() { local ifcfg_file hwaddr ifcfg_file="/etc/sysconfig/network-scripts/ifcfg-${1}" [[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return ifcfg_file=$(get_ifcfg_by_name "${1}") [[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return hwaddr=$(get_hwaddr "${1}") if [[ -n $hwaddr ]]; then ifcfg_file=$(get_ifcfg_by_hwaddr "${hwaddr}") [[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return fi ifcfg_file=$(get_ifcfg_by_device "${1}") echo -n "${ifcfg_file}" } # $1: netdev name # Return the ifcfg file whole name(including the path) of $1 if any. get_ifcfg_filename() { local ifcfg_file ifcfg_file=$(get_ifcfg_nmcli "${1}") if [[ -z ${ifcfg_file} ]]; then ifcfg_file=$(get_ifcfg_legacy "${1}") fi echo -n "${ifcfg_file}" } # returns 0 when omission of a module is desired in dracut_args # returns 1 otherwise is_dracut_mod_omitted() { local dracut_args dracut_mod=$1 set -- $(kdump_get_conf_val dracut_args) while [ $# -gt 0 ]; do case $1 in -o | --omit) [[ " ${2//[^[:alnum:]]/ } " == *" $dracut_mod "* ]] && return 0 ;; esac shift done return 1 } is_wdt_active() { local active [[ -d /sys/class/watchdog ]] || return 1 for dir in /sys/class/watchdog/*; do [[ -f "$dir/state" ]] || continue active=$(< "$dir/state") [[ $active == "active" ]] && return 0 done return 1 } have_compression_in_dracut_args() { [[ "$(kdump_get_conf_val dracut_args)" =~ (^|[[:space:]])--(gzip|bzip2|lzma|xz|lzo|lz4|zstd|no-compress|compress|squash-compressor)([[:space:]]|$) ]] } # If "dracut_args" contains "--mount" information, use it # directly without any check(users are expected to ensure # its correctness). is_mount_in_dracut_args() { [[ " $(kdump_get_conf_val dracut_args)" =~ .*[[:space:]]--mount[=[:space:]].* ]] } get_reserved_mem_size() { local reserved_mem_size=0 if is_fadump_capable; then reserved_mem_size=$(< /sys/kernel/fadump/mem_reserved) else reserved_mem_size=$(< /sys/kernel/kexec_crash_size) fi echo "$reserved_mem_size" } check_crash_mem_reserved() { local mem_reserved mem_reserved=$(get_reserved_mem_size) if [[ $mem_reserved -eq 0 ]]; then derror "No memory reserved for crash kernel" return 1 fi return 0 } check_kdump_feasibility() { if [[ ! -e /sys/kernel/kexec_crash_loaded ]]; then derror "Kdump is not supported on this kernel" return 1 fi check_crash_mem_reserved return $? } is_kernel_loaded() { local _sysfs _mode _mode=$1 case "$_mode" in kdump) _sysfs="/sys/kernel/kexec_crash_loaded" ;; fadump) _sysfs="$FADUMP_REGISTER_SYS_NODE" ;; *) derror "Unknown dump mode '$_mode' provided" return 1 ;; esac if [[ ! -f $_sysfs ]]; then derror "$_mode is not supported on this kernel" return 1 fi [[ $(< $_sysfs) -eq 1 ]] } # # This function returns the "apicid" of the boot # cpu (cpu 0) if present. # get_bootcpu_apicid() { awk ' \ BEGIN { CPU = "-1"; } \ $1=="processor" && $2==":" { CPU = $NF; } \ CPU=="0" && /^apicid/ { print $NF; } \ ' \ /proc/cpuinfo } # This function check iomem and determines if we have more than # 4GB of ram available. Returns 1 if we do, 0 if we dont need_64bit_headers() { return "$(tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }')" } # Check if secure boot is being enforced. # # Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and # SetupMode-$(the UUID), they are both 5 bytes binary data. The first four # bytes are the attributes associated with the variable and can safely be # ignored, the last bytes are one-byte true-or-false variables. If SecureBoot # is 1 and SetupMode is 0, then secure boot is being enforced. # # Assume efivars is mounted at /sys/firmware/efi/efivars. is_secure_boot_enforced() { local secure_boot_file setup_mode_file local secure_boot_byte setup_mode_byte # On powerpc, secure boot is enforced if: # host secure boot: /ibm,secure-boot/os-secureboot-enforcing DT property exists # guest secure boot: /ibm,secure-boot >= 2 if [[ -f /proc/device-tree/ibm,secureboot/os-secureboot-enforcing ]]; then return 0 fi if [[ -f /proc/device-tree/ibm,secure-boot ]] && [[ $(lsprop /proc/device-tree/ibm,secure-boot | tail -1) -ge 2 ]]; then return 0 fi # Detect secure boot on x86 and arm64 secure_boot_file=$(find /sys/firmware/efi/efivars -name "SecureBoot-*" 2> /dev/null) setup_mode_file=$(find /sys/firmware/efi/efivars -name "SetupMode-*" 2> /dev/null) if [[ -f $secure_boot_file ]] && [[ -f $setup_mode_file ]]; then secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' "$secure_boot_file" | cut -d' ' -f 5) setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' "$setup_mode_file" | cut -d' ' -f 5) if [[ $secure_boot_byte == "1" ]] && [[ $setup_mode_byte == "0" ]]; then return 0 fi fi # Detect secure boot on s390x if [[ -e "/sys/firmware/ipl/secure" && "$(< /sys/firmware/ipl/secure)" == "1" ]]; then return 0 fi return 1 } # # prepare_kexec_args # This function prepares kexec argument. # prepare_kexec_args() { local kexec_args=$1 local found_elf_args ARCH=$(uname -m) if [[ $ARCH == "i686" ]] || [[ $ARCH == "i386" ]]; then need_64bit_headers if [[ $? == 1 ]]; then found_elf_args=$(echo "$kexec_args" | grep elf32-core-headers) if [[ -n $found_elf_args ]]; then dwarn "Warning: elf32-core-headers overrides correct elf64 setting" else kexec_args="$kexec_args --elf64-core-headers" fi else found_elf_args=$(echo "$kexec_args" | grep elf64-core-headers) if [[ -z $found_elf_args ]]; then kexec_args="$kexec_args --elf32-core-headers" fi fi fi # For secureboot enabled machines, use new kexec file based syscall. # Old syscall will always fail as it does not have capability to do # kernel signature verification. if is_secure_boot_enforced; then dinfo "Secure Boot is enabled. Using kexec file based syscall." kexec_args="$kexec_args -s" fi echo "$kexec_args" } # prepare_kdump_kernel # This function return kdump_kernel given a kernel version. prepare_kdump_kernel() { local kdump_kernelver=$1 local dir img boot_dirlist boot_imglist kdump_kernel machine_id read -r machine_id < /etc/machine-id boot_dirlist=${KDUMP_BOOTDIR:-"/boot /boot/efi /efi /"} boot_imglist="$KDUMP_IMG-$kdump_kernelver$KDUMP_IMG_EXT \ $machine_id/$kdump_kernelver/$KDUMP_IMG \ EFI/Linux/$machine_id-$kdump_kernelver.efi" # The kernel of OSTree based systems is not in the standard locations. if is_ostree; then boot_dirlist="$(echo /boot/ostree/*) $boot_dirlist" fi # Use BOOT_IMAGE as reference if possible, strip the GRUB root device prefix in (hd0,gpt1) format boot_img="$(grep -P -o '^BOOT_IMAGE=(\S+)' /proc/cmdline | sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\)/\2/")" if [[ "$boot_img" == *"$kdump_kernelver" ]]; then boot_imglist="$boot_img $boot_imglist" fi for dir in $boot_dirlist; do for img in $boot_imglist; do if [[ -f "$dir/$img" ]]; then kdump_kernel=$(echo "$dir/$img" | tr -s '/') break 2 fi done done echo "$kdump_kernel" } _is_valid_kver() { [[ -f /usr/lib/modules/$1/modules.dep ]] } # This function is introduced since 64k variant may be installed on 4k or vice versa # $1 the kernel path name. parse_kver_from_path() { local _img _kver [[ -z "$1" ]] && return _img=$1 BLS_ENTRY_TOKEN=$( _kver=${_img##*/vmlinuz-} _kver=${_kver%"$KDUMP_IMG_EXT"} if _is_valid_kver "$_kver"; then echo "$_kver" return fi # BLS recommended image names, i.e. $BOOT///linux _kver=${_img##*/"$BLS_ENTRY_TOKEN"/} _kver=${_kver%%/*} if _is_valid_kver "$_kver"; then echo "$_kver" return fi # Fedora UKI installation, i.e. $BOOT/efi/EFI/Linux/-.efi _kver=${_img##*/"$BLS_ENTRY_TOKEN"-} _kver=${_kver%.efi} if _is_valid_kver "$_kver"; then echo "$_kver" return fi ddebug "Could not parse version from $_img" } _get_kdump_kernel_version() { local _version _version_nondebug if [[ -n "$KDUMP_KERNELVER" ]]; then echo "$KDUMP_KERNELVER" return fi _version=$(uname -r) if [[ ! "$_version" =~ [+|-]debug$ ]]; then echo "$_version" return fi _version_nondebug=${_version%+debug} _version_nondebug=${_version_nondebug%-debug} if [[ -f "$(prepare_kdump_kernel "$_version_nondebug")" ]]; then dinfo "Use of debug kernel detected. Trying to use $_version_nondebug" echo "$_version_nondebug" else dinfo "Use of debug kernel detected but cannot find $_version_nondebug. Falling back to $_version" echo "$_version" fi } # # Detect initrd and kernel location, results are stored in global environmental variables: # KDUMP_BOOTDIR, KDUMP_KERNELVER, KDUMP_KERNEL, DEFAULT_INITRD, and KDUMP_INITRD # # Expectes KDUMP_BOOTDIR, KDUMP_IMG, KDUMP_IMG_EXT, KDUMP_KERNELVER to be loaded from config already # and will prefer already set values so user can specify custom kernel/initramfs location # prepare_kdump_bootinfo() { local boot_initrdlist default_initrd_base var_target_initrd_dir KDUMP_KERNELVER=$(_get_kdump_kernel_version) KDUMP_KERNEL=$(prepare_kdump_kernel "$KDUMP_KERNELVER") if ! [[ -e $KDUMP_KERNEL ]]; then derror "Failed to detect kdump kernel location" return 1 fi # For 64k variant, e.g. vmlinuz-5.14.0-327.el9.aarch64+64k-debug if [[ "$KDUMP_KERNEL" == *"+debug" || "$KDUMP_KERNEL" == *"64k-debug" ]]; then dwarn "Using debug kernel, you may need to set a larger crashkernel than the default value." fi # Set KDUMP_BOOTDIR to where kernel image is stored if is_uki "$KDUMP_KERNEL"; then KDUMP_BOOTDIR=/boot else KDUMP_BOOTDIR=$(dirname "$KDUMP_KERNEL") fi # Default initrd should just stay aside of kernel image, try to find it in KDUMP_BOOTDIR boot_initrdlist="initramfs-$KDUMP_KERNELVER.img initrd" for initrd in $boot_initrdlist; do if [[ -f "$KDUMP_BOOTDIR/$initrd" ]]; then default_initrd_base="$initrd" DEFAULT_INITRD="$KDUMP_BOOTDIR/$default_initrd_base" break fi done # Create kdump initrd basename from default initrd basename # initramfs-5.7.9-200.fc32.x86_64.img => initramfs-5.7.9-200.fc32.x86_64kdump.img # initrd => initrdkdump if [[ -z $default_initrd_base ]]; then kdump_initrd_base=initramfs-${KDUMP_KERNELVER}kdump.img elif [[ $default_initrd_base == *.* ]]; then kdump_initrd_base=${default_initrd_base%.*}kdump.${DEFAULT_INITRD##*.} else kdump_initrd_base=${default_initrd_base}kdump fi # Place kdump initrd in $(/var/lib/kdump) if $(KDUMP_BOOTDIR) not writable if [[ ! -w $KDUMP_BOOTDIR ]]; then var_target_initrd_dir="/var/lib/kdump" mkdir -p "$var_target_initrd_dir" KDUMP_INITRD="$var_target_initrd_dir/$kdump_initrd_base" else KDUMP_INITRD="$KDUMP_BOOTDIR/$kdump_initrd_base" fi } get_watchdog_drvs() { local _wdtdrvs _drv _dir for _dir in /sys/class/watchdog/*; do # device/modalias will return driver of this device [[ -f "$_dir/device/modalias" ]] || continue _drv=$(< "$_dir/device/modalias") _drv=$(modprobe --set-version "$KDUMP_KERNELVER" -R "$_drv" 2> /dev/null) for i in $_drv; do if ! [[ " $_wdtdrvs " == *" $i "* ]]; then _wdtdrvs="$_wdtdrvs $i" fi done done echo "$_wdtdrvs" } _cmdline_parse() { local opt val while read -r opt; do if [[ $opt =~ = ]]; then val=${opt#*=} opt=${opt%%=*} # ignore options like 'foo=' [[ -z $val ]] && continue # xargs removes quotes, add them again [[ $val =~ [[:space:]] ]] && val="\"$val\"" else val="" fi echo "$opt $val" done <<< "$(echo "$1" | xargs -n 1 echo)" } # # prepare_cmdline # This function performs a series of edits on the command line. # Store the final result in global $KDUMP_COMMANDLINE. prepare_cmdline() { local in out append opt val id drv local -A remove in=${1:-$(< /proc/cmdline)} while read -r opt val; do [[ -n "$opt" ]] || continue remove[$opt]=1 done <<< "$(_cmdline_parse "$2")" append=$3 # These params should always be removed remove[crashkernel]=1 remove[panic_on_warn]=1 # Always remove "root=X", as we now explicitly generate all kinds # of dump target mount information including root fs. # # We do this before KDUMP_COMMANDLINE_APPEND, if one really cares # about it(e.g. for debug purpose), then can pass "root=X" using # KDUMP_COMMANDLINE_APPEND. remove[root]=1 # With the help of "--hostonly-cmdline", we can avoid some interitage. remove[rd.lvm.lv]=1 remove[rd.luks.uuid]=1 remove[rd.dm.uuid]=1 remove[rd.md.uuid]=1 remove[fcoe]=1 # Remove netroot, rd.iscsi.initiator and iscsi_initiator since # we get duplicate entries for the same in case iscsi code adds # it as well. remove[netroot]=1 remove[rd.iscsi.initiator]=1 remove[iscsi_initiator]=1 while read -r opt val; do [[ -n "$opt" ]] || continue [[ -n "${remove[$opt]}" ]] && continue if [[ -n "$val" ]]; then out+="$opt=$val " else out+="$opt " fi done <<< "$(_cmdline_parse "$in")" out+="$append " id=$(get_bootcpu_apicid) if [[ -n "${id}" ]]; then out+="disable_cpu_apicid=$id " fi # If any watchdog is used, set it's pretimeout to 0. pretimeout let # watchdog panic the kernel first, and reset the system after the # panic. If the system is already in kdump, panic is not helpful # and only increase the chance of watchdog failure. for drv in $(get_watchdog_drvs); do out+="$drv.pretimeout=0 " if [[ $drv == hpwdt ]]; then # hpwdt have a special parameter kdumptimeout, it is # only supposed to be set to non-zero in first kernel. # In kdump, non-zero value could prevent the watchdog # from resetting the system. out+="$drv.kdumptimeout=0 " fi done # Always disable gpt-auto-generator as it hangs during boot of the # crash kernel. Furthermore we know which disk will be used for dumping # (if at all) and add it explicitly. is_uki "$KDUMP_KERNEL" && out+="rd.systemd.gpt_auto=no " # Trim unnecessary whitespaces echo "$out" | sed -e "s/^ *//g" -e "s/ *$//g" -e "s/ \+/ /g" } PROC_IOMEM=/proc/iomem #get system memory size i.e. memblock.memory.total_size in the unit of GB get_system_size() { sum=$(sed -n "s/\s*\([0-9a-fA-F]\+\)-\([0-9a-fA-F]\+\) : System RAM$/+ 0x\2 - 0x\1 + 1/p" $PROC_IOMEM) echo $(( (sum) / 1024 / 1024 / 1024)) } # Return the recommended size for the reserved crashkernel memory # depending on the system memory size. # # This functions is expected to be consistent with the parse_crashkernel_mem() # in kernel i.e. how kernel allocates the kdump memory given the crashkernel # parameter crashkernel=range1:size1[,range2:size2,…] and the system memory # size. get_recommend_size() { local mem_size=$1 local _ck_cmdline=$2 local range start start_unit end end_unit size while read -r -d , range; do # need to use non-default IFS as double spaces are used as a # single delimiter while commas aren't... IFS=, read start start_unit end end_unit size <<< \ "$(echo "$range" | sed -n "s/\([0-9]\+\)\([GT]\?\)-\([0-9]*\)\([GT]\?\):\([0-9]\+[MG]\)/\1,\2,\3,\4,\5/p")" # aka. 102400T end=${end:-104857600} [[ "$end_unit" == T ]] && end=$((end * 1024)) [[ "$start_unit" == T ]] && start=$((start * 1024)) if [[ $mem_size -ge $start ]] && [[ $mem_size -lt $end ]]; then echo "$size" return fi # append a ',' as read expects the 'file' to end with a delimiter done <<< "$_ck_cmdline," # no matching range found echo "0M" } has_mlx5() { [[ -d /sys/bus/pci/drivers/mlx5_core ]] } has_aarch64_smmu() { ls /sys/devices/platform/arm-smmu-* 1> /dev/null 2>&1 } is_memsize() { [[ "$1" =~ ^[+-]?[0-9]+[KkMmGgTtPbEe]?$ ]]; } # range defined for crashkernel parameter # i.e. -[] is_memrange() { is_memsize "${1%-*}" || return 1 [[ -n ${1#*-} ]] || return 0 is_memsize "${1#*-}" } to_bytes() { local _s _s="$1" is_memsize "$_s" || return 1 case "${_s: -1}" in K|k) _s=${_s::-1} _s="$((_s * 1024))" ;; M|m) _s=${_s::-1} _s="$((_s * 1024 * 1024))" ;; G|g) _s=${_s::-1} _s="$((_s * 1024 * 1024 * 1024))" ;; T|t) _s=${_s::-1} _s="$((_s * 1024 * 1024 * 1024 * 1024))" ;; P|p) _s=${_s::-1} _s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024))" ;; E|e) _s=${_s::-1} _s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024 * 1024))" ;; *) ;; esac echo "$_s" } memsize_add() { local -a units=("" "K" "M" "G" "T" "P" "E") local i a b a=$(to_bytes "$1") || return 1 b=$(to_bytes "$2") || return 1 i=0 (( a += b )) while :; do [[ $(( a / 1024 )) -eq 0 ]] && break [[ $(( a % 1024 )) -ne 0 ]] && break [[ $(( ${#units[@]} - 1 )) -eq $i ]] && break (( a /= 1024 )) (( i += 1 )) done echo "${a}${units[$i]}" } _crashkernel_parse() { local ck entry local range size offset ck="$1" if [[ "$ck" == *@* ]]; then offset="@${ck##*@}" ck=${ck%@*} elif [[ "$ck" == *,high ]] || [[ "$ck" == *,low ]]; then offset=",${ck##*,}" ck=${ck%,*} else offset='' fi while read -d , -r entry; do [[ -n "$entry" ]] || continue if [[ "$entry" == *:* ]]; then range=${entry%:*} size=${entry#*:} else range="" size=${entry} fi echo "$size;$range;" done <<< "$ck," echo ";;$offset" } # $1 crashkernel command line parameter # $2 size to be added _crashkernel_add() { local ck delta ret local range size offset ck="$1" delta="$2" ret="" while IFS=';' read -r size range offset; do if [[ -n "$offset" ]]; then ret="${ret%,}$offset" break fi [[ -n "$size" ]] || continue if [[ -n "$range" ]]; then is_memrange "$range" || return 1 ret+="$range:" fi size=$(memsize_add "$size" "$delta") || return 1 ret+="$size," done < <( _crashkernel_parse "$ck") echo "${ret%,}" } # get default crashkernel # $1 dump mode, if not specified, dump_mode will be judged by is_fadump_capable # $2 kernel-release, if not specified, got by _get_kdump_kernel_version kdump_get_arch_recommend_crashkernel() { local _arch _ck_cmdline _dump_mode local _delta=0 if [[ -z "$1" ]]; then if is_fadump_capable; then _dump_mode=fadump else _dump_mode=kdump fi else _dump_mode=$1 fi _arch=$(uname -m) if [[ $_arch == "x86_64" ]] || [[ $_arch == "s390x" ]]; then _ck_cmdline="1G-4G:192M,4G-64G:256M,64G-:512M" is_sme_or_sev_active && ((_delta += 64)) elif [[ $_arch == "aarch64" ]]; then local _running_kernel # Base line for 4K variant kernel. The formula is based on x86 plus extra = 64M _ck_cmdline="1G-4G:256M,4G-64G:320M,64G-:576M" if [[ -z "$2" ]]; then _running_kernel=$(_get_kdump_kernel_version) else _running_kernel=$2 fi # the naming convention of 64k variant suffixes with +64k, e.g. "vmlinuz-5.14.0-312.el9.aarch64+64k" if echo "$_running_kernel" | grep -q 64k; then # Without smmu, the diff of MemFree between 4K and 64K measured on a high end aarch64 machine is 82M. # Picking up 100M to cover this diff. And finally, we have "1G-4G:356M;4G-64G:420M;64G-:676M" ((_delta += 100)) # On a 64K system, the extra 384MB is calculated by: cmdq_num * 16 bytes + evtq_num * 32B + priq_num * 16B # While on a 4K system, it is negligible has_aarch64_smmu && ((_delta += 384)) #64k kernel, mlx5 consumes extra 188M memory, and choose 200M has_mlx5 && ((_delta += 200)) else #4k kernel, mlx5 consumes extra 124M memory, and choose 150M has_mlx5 && ((_delta += 150)) fi elif [[ $_arch == "ppc64le" ]]; then if [[ $_dump_mode == "fadump" ]]; then _ck_cmdline="4G-16G:768M,16G-64G:1G,64G-128G:2G,128G-1T:4G,1T-2T:6G,2T-4T:12G,4T-8T:20G,8T-16T:36G,16T-32T:64G,32T-64T:128G,64T-:180G" else _ck_cmdline="2G-4G:384M,4G-16G:512M,16G-64G:1G,64G-128G:2G,128G-:4G" fi fi echo -n "$(_crashkernel_add "$_ck_cmdline" "${_delta}M")" } # return recommended size based on current system RAM size # $1: kernel version, if not set, will defaults to $(uname -r) kdump_get_arch_recommend_size() { local _ck_cmdline _sys_mem if ! [[ -r "/proc/iomem" ]]; then echo "Error, can not access /proc/iomem." return 1 fi _sys_mem=$(get_system_size) _ck_cmdline=$(kdump_get_arch_recommend_crashkernel) _ck_cmdline=${_ck_cmdline//-:/-102400T:} get_recommend_size "$_sys_mem" "$_ck_cmdline" } # Print all underlying crypt devices of a block device # print nothing if device is not on top of a crypt device # $1: the block device to be checked in maj:min format get_luks_crypt_dev() { local _type [[ -b /dev/block/$1 ]] || return 1 _type=$(blkid -u filesystem,crypto -o export -- "/dev/block/$1" | \ sed -n -E "s/^TYPE=(.*)$/\1/p") [[ $_type == "crypto_LUKS" ]] && echo "$1" for _x in "/sys/dev/block/$1/slaves/"*; do [[ -f $_x/dev ]] || continue [[ $_x/subsystem -ef /sys/class/block ]] || continue get_luks_crypt_dev "$(< "$_x/dev")" done } # kdump_get_maj_min # Prints the major and minor of a device node. # Example: # $ get_maj_min /dev/sda2 # 8:2 kdump_get_maj_min() { local _majmin _majmin="$(stat -L -c '%t:%T' "$1" 2> /dev/null)" printf "%s" "$((0x${_majmin%:*})):$((0x${_majmin#*:}))" } get_all_kdump_crypt_dev() { local _dev for _dev in $(get_block_dump_target); do get_luks_crypt_dev "$(kdump_get_maj_min "$_dev")" done }