Skip to content

Commit

Permalink
[202405][Mellanox] Fix retry logic on discovery of MST device (#20390)
Browse files Browse the repository at this point in the history
* [Mellanox] Fix retry logic on discovery of MST device

* Adding clear semaphore after device check

* Clean up stale code in syncd.sh
  • Loading branch information
dgsudharsan authored Oct 28, 2024
1 parent 378ae58 commit c785950
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 24 deletions.
19 changes: 1 addition & 18 deletions files/scripts/syncd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,6 @@

. /usr/local/bin/syncd_common.sh

declare -r UNKN_MST="unknown"

function GetMstDevice() {
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"

if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
echo "${_MST_DEVICE}"
fi
}

function startplatform() {

# platform specific tasks
Expand All @@ -36,12 +24,7 @@ function startplatform() {
debug "Starting Firmware update procedure"
/usr/bin/mst start --with_i2cdev

local -r _MST_DEVICE="$(GetMstDevice)"
if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
fi

/usr/bin/mlnx-fw-upgrade.sh -v
/usr/bin/mlnx-fw-upgrade.sh -c -v
if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then
debug "Failed to upgrade fw. " "$?" "Restart syncd"
exit 1
Expand Down
31 changes: 25 additions & 6 deletions platform/mellanox/mlnx-fw-upgrade.j2
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ function PrintHelp() {
echo " -u, --upgrade Upgrade ASIC firmware using next boot image (useful after SONiC-To-SONiC update)"
echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)"
echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)"
echo " -c, --clear-semaphore Clear hw resources before updating firmware"
echo " -h, --help Print help"
echo
echo "Examples:"
Expand All @@ -95,6 +96,9 @@ function ParseArguments() {
-s|--syslog)
SYSLOG_LOGGER="${YES_PARAM}"
;;
-c|--clear-semaphore)
CLEAR_SEMAPHORE="${YES_PARAM}"
;;
-h|--help)
PrintHelp
exit "${EXIT_SUCCESS}"
Expand Down Expand Up @@ -182,16 +186,20 @@ function UnlockStateChange() {
function WaitForDevice() {
local -i QUERY_RETRY_COUNT_MAX="10"
local -i QUERY_RETRY_COUNT="0"
local SPC_MST_DEV
local QUERY_RC=""

local SPC_MST_DEV=$(GetSPCMstDevice)

while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${SPC_MST_DEV}" == "${UNKN_MST}") ]]; do
while : ; do
SPC_MST_DEV=$(GetSPCMstDevice)
${QUERY_XML} -d ${SPC_MST_DEV} -o ${QUERY_FILE}
QUERY_RC="$?"
[[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${QUERY_RC}" != "${EXIT_SUCCESS}") ]] || break
sleep 1s
((QUERY_RETRY_COUNT++))
SPC_MST_DEV=$(GetSPCMstDevice)
LogInfo "Retrying MST device query ${QUERY_RETRY_COUNT}"
done

if [[ "${SPC_MST_DEV}" == "${UNKN_MST}" ]]; then
if [[ "${QUERY_RC}" != "${EXIT_SUCCESS}" ]]; then
# Couldn't Detect the Spectrum ASIC. Exit failure and print the detailed information
output=$(${QUERY_CMD})
failure_msg="${output#*Fail : }"
Expand Down Expand Up @@ -232,7 +240,7 @@ function GetSPCMstDevice() {

if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
else
echo "${_MST_DEVICE}"
fi

Expand Down Expand Up @@ -394,6 +402,15 @@ function Cleanup() {
fi
}

function ClearSemaphore() {
if [[ "${CLEAR_SEMAPHORE}" == "${YES_PARAM}" ]]; then
local -r _MST_DEVICE="$(GetSPCMstDevice)"
if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
fi
fi
}

trap Cleanup EXIT

ParseArguments "$@"
Expand All @@ -404,6 +421,8 @@ LockStateChange

WaitForDevice

ClearSemaphore

if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then
UpgradeFW
else
Expand Down

0 comments on commit c785950

Please sign in to comment.