From c785950bf0bc9645179e80fbbc851140d6b3cd92 Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Mon, 28 Oct 2024 08:37:31 -0700 Subject: [PATCH] [202405][Mellanox] Fix retry logic on discovery of MST device (#20390) * [Mellanox] Fix retry logic on discovery of MST device * Adding clear semaphore after device check * Clean up stale code in syncd.sh --- files/scripts/syncd.sh | 19 +---------------- platform/mellanox/mlnx-fw-upgrade.j2 | 31 ++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/files/scripts/syncd.sh b/files/scripts/syncd.sh index 0930de72b87c..0c794ef5d9dc 100755 --- a/files/scripts/syncd.sh +++ b/files/scripts/syncd.sh @@ -2,18 +2,6 @@ . /usr/local/bin/syncd_common.sh -declare -r UNKN_MST="unknown" - -function GetMstDevice() { - local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)" - - if [[ ! -c "${_MST_DEVICE}" ]]; then - echo "${UNKN_MST}" - else - echo "${_MST_DEVICE}" - fi -} - function startplatform() { # platform specific tasks @@ -36,12 +24,7 @@ function startplatform() { debug "Starting Firmware update procedure" /usr/bin/mst start --with_i2cdev - local -r _MST_DEVICE="$(GetMstDevice)" - if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then - /usr/bin/flint -d $_MST_DEVICE --clear_semaphore - fi - - /usr/bin/mlnx-fw-upgrade.sh -v + /usr/bin/mlnx-fw-upgrade.sh -c -v if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then debug "Failed to upgrade fw. " "$?" "Restart syncd" exit 1 diff --git a/platform/mellanox/mlnx-fw-upgrade.j2 b/platform/mellanox/mlnx-fw-upgrade.j2 index e4c567c1b76e..466249c85986 100755 --- a/platform/mellanox/mlnx-fw-upgrade.j2 +++ b/platform/mellanox/mlnx-fw-upgrade.j2 @@ -72,6 +72,7 @@ function PrintHelp() { echo " -u, --upgrade Upgrade ASIC firmware using next boot image (useful after SONiC-To-SONiC update)" echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)" echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)" + echo " -c, --clear-semaphore Clear hw resources before updating firmware" echo " -h, --help Print help" echo echo "Examples:" @@ -95,6 +96,9 @@ function ParseArguments() { -s|--syslog) SYSLOG_LOGGER="${YES_PARAM}" ;; + -c|--clear-semaphore) + CLEAR_SEMAPHORE="${YES_PARAM}" + ;; -h|--help) PrintHelp exit "${EXIT_SUCCESS}" @@ -182,16 +186,20 @@ function UnlockStateChange() { function WaitForDevice() { local -i QUERY_RETRY_COUNT_MAX="10" local -i QUERY_RETRY_COUNT="0" + local SPC_MST_DEV + local QUERY_RC="" - local SPC_MST_DEV=$(GetSPCMstDevice) - - while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${SPC_MST_DEV}" == "${UNKN_MST}") ]]; do + while : ; do + SPC_MST_DEV=$(GetSPCMstDevice) + ${QUERY_XML} -d ${SPC_MST_DEV} -o ${QUERY_FILE} + QUERY_RC="$?" + [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${QUERY_RC}" != "${EXIT_SUCCESS}") ]] || break sleep 1s ((QUERY_RETRY_COUNT++)) - SPC_MST_DEV=$(GetSPCMstDevice) + LogInfo "Retrying MST device query ${QUERY_RETRY_COUNT}" done - if [[ "${SPC_MST_DEV}" == "${UNKN_MST}" ]]; then + if [[ "${QUERY_RC}" != "${EXIT_SUCCESS}" ]]; then # Couldn't Detect the Spectrum ASIC. Exit failure and print the detailed information output=$(${QUERY_CMD}) failure_msg="${output#*Fail : }" @@ -232,7 +240,7 @@ function GetSPCMstDevice() { if [[ ! -c "${_MST_DEVICE}" ]]; then echo "${UNKN_MST}" - else + else echo "${_MST_DEVICE}" fi @@ -394,6 +402,15 @@ function Cleanup() { fi } +function ClearSemaphore() { + if [[ "${CLEAR_SEMAPHORE}" == "${YES_PARAM}" ]]; then + local -r _MST_DEVICE="$(GetSPCMstDevice)" + if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then + /usr/bin/flint -d $_MST_DEVICE --clear_semaphore + fi + fi +} + trap Cleanup EXIT ParseArguments "$@" @@ -404,6 +421,8 @@ LockStateChange WaitForDevice +ClearSemaphore + if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then UpgradeFW else