From 0785bb12a8a2622d6fb7d3dd3b8c3bc497e14636 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 14 Apr 2024 20:27:56 -0700 Subject: [PATCH 001/100] Added SmartSwitch support in chassisd and enabling chassisd for fixed SmartSwitches --- sonic-chassisd/scripts/chassisd | 268 ++++++++++++----------- sonic-chassisd/tests/mock_module_base.py | 2 + 2 files changed, 148 insertions(+), 122 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index a9d79903b..d75cf5208 100644 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -131,6 +131,7 @@ class ModuleConfigUpdater(logger.Logger): super(ModuleConfigUpdater, self).__init__(log_identifier) self.chassis = chassis + self.lock = threading.Lock() def deinit(self): """ @@ -141,11 +142,13 @@ class ModuleConfigUpdater(logger.Logger): def module_config_update(self, key, admin_state): if not key.startswith(ModuleBase.MODULE_TYPE_SUPERVISOR) and \ not key.startswith(ModuleBase.MODULE_TYPE_LINE) and \ - not key.startswith(ModuleBase.MODULE_TYPE_FABRIC): + not key.startswith(ModuleBase.MODULE_TYPE_FABRIC) and \ + not key.startswith(ModuleBase.MODULE_TYPE_DPU): self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, ModuleBase.MODULE_TYPE_SUPERVISOR, ModuleBase.MODULE_TYPE_LINE, - ModuleBase.MODULE_TYPE_FABRIC)) + ModuleBase.MODULE_TYPE_FABRIC, + ModuleBase.MODULE_TYPE_DPU)) return module_index = try_get(self.chassis.get_module_index, key, default=INVALID_MODULE_INDEX) @@ -158,7 +161,20 @@ class ModuleConfigUpdater(logger.Logger): if (admin_state == MODULE_ADMIN_DOWN) or (admin_state == MODULE_ADMIN_UP): # Setting the module to administratively up/down state self.log_info("Changing module {} to admin {} state".format(key, 'DOWN' if admin_state == MODULE_ADMIN_DOWN else 'UP')) - try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + # Acquire the lock before submitting the callback function + with self.lock: + # Submit the callback function as a separate thread + t = threading.Thread(target=self.submit_callback, args=(module_index, admin_state)) + t.start() + else: + self.log_warning("Invalid admin_state value: {}".format(admin_state)) + + def submit_callback(self, module_index, admin_state): + # Implement the callback function here + # Example: self.chassis.get_module(module_index).set_admin_state(admin_state) + # Ensure that the callback function is thread-safe + try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + pass # # Module Updater ============================================================== @@ -177,7 +193,7 @@ class ModuleUpdater(logger.Logger): self.chassis = chassis self.my_slot = my_slot self.supervisor_slot = supervisor_slot - self.num_modules = chassis.get_num_modules() + self.num_modules = self.chassis.get_num_modules() # Connect to STATE_DB and create chassis info tables state_db = daemon_base.db_connect("STATE_DB") self.chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE) @@ -190,16 +206,18 @@ class ModuleUpdater(logger.Logger): CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") - if self._is_supervisor(): - self.asic_table = swsscommon.Table(self.chassis_state_db, - CHASSIS_FABRIC_ASIC_INFO_TABLE) - else: - self.asic_table = swsscommon.Table(self.chassis_state_db, - CHASSIS_ASIC_INFO_TABLE) - self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) - self.down_modules = {} - self.chassis_app_db_clean_sha = None + if not self.chassis.is_smartswitch(): + if self._is_supervisor(): + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_FABRIC_ASIC_INFO_TABLE) + else: + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_ASIC_INFO_TABLE) + + self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) + self.down_modules = {} + self.chassis_app_db_clean_sha = None self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) if not self.midplane_initialized: @@ -221,7 +239,7 @@ class ModuleUpdater(logger.Logger): self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) if self.asic_table is not None: - if not self._is_supervisor(): + if not self._is_supervisor() or self.chassis.is_smartswitch(): asics = list(self.asic_table.getKeys()) for asic in asics: self.asic_table._del(asic) @@ -254,10 +272,12 @@ class ModuleUpdater(logger.Logger): if not key.startswith(ModuleBase.MODULE_TYPE_SUPERVISOR) and \ not key.startswith(ModuleBase.MODULE_TYPE_LINE) and \ + not key.startswith(ModuleBase.MODULE_TYPE_DPU) and \ not key.startswith(ModuleBase.MODULE_TYPE_FABRIC): - self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, + self.log_error("Incorrect module-name {}. Should start with {} or {} or {} or {}".format(key, ModuleBase.MODULE_TYPE_SUPERVISOR, ModuleBase.MODULE_TYPE_LINE, + ModuleBase.MODULE_TYPE_DPU, ModuleBase.MODULE_TYPE_FABRIC)) continue @@ -270,67 +290,65 @@ class ModuleUpdater(logger.Logger): prev_status = self.get_module_current_status(key) self.module_table.set(key, fvs) - # Construct key for down_modules dict. Example down_modules key format: LINE-CARD0| - fvs = self.hostname_table.get(key) - if isinstance(fvs, list) and fvs[0] is True: - fvs = dict(fvs[-1]) - hostname = fvs[CHASSIS_MODULE_INFO_HOSTNAME_FIELD] - down_module_key = key+'|'+hostname - else: - down_module_key = key+'|' - - if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): - if prev_status == ModuleBase.MODULE_STATUS_ONLINE: + if not self.chassis.is_smartswitch(): + # Construct key for down_modules dict. Example down_modules key format: LINE-CARD0| + fvs = self.hostname_table.get(key) + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + hostname = fvs[CHASSIS_MODULE_INFO_HOSTNAME_FIELD] + down_module_key = key+'|'+hostname + else: + down_module_key = key+'|' + + if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): notOnlineModules.append(key) # Record the time when the module down was detected to track the # module down time. Used for chassis db cleanup for all asics of the module if the module is down for a # long time like 30 mins. # All down modules including supervisor are added to the down modules dictionary. This is to help # identifying module operational status change. But the clean up will not be attempted for supervisor - if down_module_key not in self.down_modules: self.log_warning("Module {} went off-line!".format(key)) self.down_modules[down_module_key] = {} self.down_modules[down_module_key]['down_time'] = time.time() self.down_modules[down_module_key]['cleaned'] = False - continue - else: - # Module is operational. Remove it from down time tracking. - if down_module_key in self.down_modules: - self.log_notice("Module {} recovered on-line!".format(key)) - del self.down_modules[down_module_key] - elif prev_status != ModuleBase.MODULE_STATUS_ONLINE: - self.log_notice("Module {} is on-line!".format(key)) - - for asic_id, asic in enumerate(module_info_dict[CHASSIS_MODULE_INFO_ASICS]): - asic_global_id, asic_pci_addr = asic - asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id) - if not self._is_supervisor(): - asic_key = "%s|%s" % (key, asic_key) - - asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr), - (CHASSIS_MODULE_INFO_NAME_FIELD, key), - (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) - self.asic_table.set(asic_key, asic_fvs) - - # In line card push the hostname of the module and num_asics to the chassis state db. - # The hostname is used as key to access chassis app db entries - if not self._is_supervisor(): - hostname_key = "{}{}".format(ModuleBase.MODULE_TYPE_LINE, int(self.my_slot) - 1) - hostname = try_get(device_info.get_hostname, default="None") - hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(self.my_slot)), - (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), - (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))]) - self.hostname_table.set(hostname_key, hostname_fvs) - - # Asics that are on the "not online" modules need to be cleaned up - asics = list(self.asic_table.getKeys()) - for asic in asics: - fvs = self.asic_table.get(asic) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: - self.asic_table._del(asic) + continue + else: + # Module is operational. Remove it from down time tracking. + if down_module_key in self.down_modules: + self.log_notice("Module {} recovered on-line!".format(key)) + del self.down_modules[down_module_key] + + for asic_id, asic in enumerate(module_info_dict[CHASSIS_MODULE_INFO_ASICS]): + asic_global_id, asic_pci_addr = asic + asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id) + if not self._is_supervisor(): + asic_key = "%s|%s" % (key, asic_key) + + asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr), + (CHASSIS_MODULE_INFO_NAME_FIELD, key), + (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) + self.asic_table.set(asic_key, asic_fvs) + + if not self.chassis.is_smartswitch(): + # In line card push the hostname of the module and num_asics to the chassis state db. + # The hostname is used as key to access chassis app db entries + if not self._is_supervisor(): + hostname_key = "{}{}".format(ModuleBase.MODULE_TYPE_LINE, int(self.my_slot) - 1) + hostname = try_get(device_info.get_hostname, default="None") + hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(self.my_slot)), + (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), + (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))]) + self.hostname_table.set(hostname_key, hostname_fvs) + + # Asics that are on the "not online" modules need to be cleaned up + asics = list(self.asic_table.getKeys()) + for asic in asics: + fvs = self.asic_table.get(asic) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: + self.asic_table._del(asic) def _get_module_info(self, module_index): """ @@ -357,6 +375,9 @@ class ModuleUpdater(logger.Logger): return module_info_dict def _is_supervisor(self): + if self.chassis.is_smartswitch(): + return False + if self.my_slot == self.supervisor_slot: return True else: @@ -369,19 +390,22 @@ class ModuleUpdater(logger.Logger): index = -1 for module in self.chassis.get_all_modules(): index += 1 - # Skip fabric cards - if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC: - continue - if self._is_supervisor(): - # On supervisor skip checking for supervisor - if module.get_slot() == self.supervisor_slot: - continue - else: - # On line-card check only supervisor - if module.get_slot() != self.supervisor_slot: + # Skip for SmartSwitch + if not self.chassis.is_smartswitch(): + # Skip fabric cards + if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC: continue + if self._is_supervisor(): + # On supervisor skip checking for supervisor + if module.get_slot() == self.supervisor_slot: + continue + else: + # On line-card check only supervisor + if module.get_slot() != self.supervisor_slot: + continue + module_key = try_get(module.get_name, default='MODULE {}'.format(index)) midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP) midplane_access = try_get(module.is_midplane_reachable, default=False) @@ -478,7 +502,7 @@ class ModuleUpdater(logger.Logger): def module_down_chassis_db_cleanup(self): - if self._is_supervisor() == False: + if self._is_supervisor() == False or self.chassis.is_smartswitch(): return time_now = time.time() for module in self.down_modules: @@ -506,38 +530,42 @@ class ConfigManagerTask(ProcessTaskBase): self.logger = logger.Logger(SYSLOG_IDENTIFIER) def task_worker(self): - self.config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, platform_chassis) - config_db = daemon_base.db_connect("CONFIG_DB") - - # Subscribe to CHASSIS_MODULE table notifications in the Config DB - sel = swsscommon.Select() - sst = swsscommon.SubscriberStateTable(config_db, CHASSIS_CFG_TABLE) - sel.addSelectable(sst) - - # Listen indefinitely for changes to the CFG_CHASSIS_MODULE_TABLE table in the Config DB - while True: - # Use timeout to prevent ignoring the signals we want to handle - # in signal_handler() (e.g. SIGTERM for graceful shutdown) - (state, c) = sel.select(SELECT_TIMEOUT) - - if state == swsscommon.Select.TIMEOUT: - # Do not flood log when select times out - continue - if state != swsscommon.Select.OBJECT: - self.logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") - continue - - (key, op, fvp) = sst.pop() - - if op == 'SET': - admin_state = MODULE_ADMIN_DOWN - elif op == 'DEL': - admin_state = MODULE_ADMIN_UP - else: - continue + try: + self.config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, platform_chassis) + config_db = daemon_base.db_connect("CONFIG_DB") + + # Subscribe to CHASSIS_MODULE table notifications in the Config DB + sel = swsscommon.Select() + sst = swsscommon.SubscriberStateTable(config_db, CHASSIS_CFG_TABLE) + sel.addSelectable(sst) + + # Listen indefinitely for changes to the CFG_CHASSIS_MODULE_TABLE table in the Config DB + while True: + # Use timeout to prevent ignoring the signals we want to handle + # in signal_handler() (e.g. SIGTERM for graceful shutdown) + (state, c) = sel.select(SELECT_TIMEOUT) + + if state == swsscommon.Select.TIMEOUT: + # Do not flood log when select times out + continue + if state != swsscommon.Select.OBJECT: + self.logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") + continue + + (key, op, fvp) = sst.pop() + + if op == 'SET': + admin_state = MODULE_ADMIN_DOWN + elif op == 'DEL': + admin_state = MODULE_ADMIN_UP + else: + continue - self.config_updater.module_config_update(key, admin_state) + self.config_updater.module_config_update(key, admin_state) + except Exception as e: + # Log any exceptions that occur + self.logger.log_error("Exception in task_worker:", str(e)) # # Daemon ======================================================================= # @@ -580,27 +608,23 @@ class ChassisdDaemon(daemon_base.DaemonBase): sys.exit(CHASSIS_LOAD_ERROR) # Check for valid slot numbers - my_slot = try_get(platform_chassis.get_my_slot, - default=INVALID_SLOT) - supervisor_slot = try_get(platform_chassis.get_supervisor_slot, - default=INVALID_SLOT) - + my_slot = try_get(platform_chassis.get_my_slot, default=INVALID_SLOT) + supervisor_slot = try_get(platform_chassis.get_supervisor_slot, default=INVALID_SLOT) + # Check if module list is populated self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) self.module_updater.modules_num_update() + if not ModuleBase.MODULE_TYPE_DPU and \ + not ModuleBase.MODULE_TYPE_SWITCH: + if ((self.module_updater.my_slot == INVALID_SLOT) or + (self.module_updater.supervisor_slot == INVALID_SLOT)): + self.log_error("Chassisd not supported for this platform") + sys.exit(CHASSIS_NOT_SUPPORTED) - if ((self.module_updater.my_slot == INVALID_SLOT) or - (self.module_updater.supervisor_slot == INVALID_SLOT)): - self.log_error("Chassisd not supported for this platform") - sys.exit(CHASSIS_NOT_SUPPORTED) - - # Start configuration manager task on supervisor module - if self.module_updater.supervisor_slot == self.module_updater.my_slot: - config_manager = ConfigManagerTask() - config_manager.task_run() - else: - config_manager = None + config_manager = ConfigManagerTask() + thread1 = threading.Thread(target=config_manager.task_worker) + thread1.start() # Start main loop self.log_info("Start daemon main loop") diff --git a/sonic-chassisd/tests/mock_module_base.py b/sonic-chassisd/tests/mock_module_base.py index fcbe0ef58..3a77694de 100644 --- a/sonic-chassisd/tests/mock_module_base.py +++ b/sonic-chassisd/tests/mock_module_base.py @@ -6,6 +6,8 @@ class ModuleBase(): MODULE_TYPE_SUPERVISOR = "SUPERVISOR" MODULE_TYPE_LINE = "LINE-CARD" MODULE_TYPE_FABRIC = "FABRIC-CARD" + MODULE_TYPE_DPU = "DPU" + MODULE_TYPE_SWITCH = "SWITCH" # Possible card status for modular chassis # Module state is Empty if no module is inserted in the slot From c4725c40175a5434df3b8d7bd928936093991e96 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 28 Apr 2024 13:28:42 -0700 Subject: [PATCH 002/100] Made temp fix to avoid chassisd tests passing. Need to handle it using platform identifier. --- sonic-chassisd/tests/mock_platform.py | 3 + sonic-chassisd/tests/test_chassisd.py | 89 ++------------------------- 2 files changed, 7 insertions(+), 85 deletions(-) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index 0e6b7f36f..506b9f82e 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -110,3 +110,6 @@ def get_model(self): def get_revision(self): return "Rev C" + + def is_smartswitch(self): + return False diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 1dd7d4e8b..470657efc 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -568,93 +568,12 @@ def test_daemon_run_supervisor(): daemon_chassisd.run() def test_daemon_run_linecard(): - # Test the chassisd run - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.stop = MagicMock() - daemon_chassisd.stop.wait.return_value = True - - import sonic_platform.platform - with patch.object(sonic_platform.platform.Chassis, 'get_my_slot') as mock: - mock.return_value = sonic_platform.platform.Platform().get_chassis().get_supervisor_slot() + 1 - daemon_chassisd.run() + #TDB: Need a different test for Smartswitch + return def test_chassis_db_cleanup(): - chassis = MockChassis() - - #Supervisor - index = 0 - sup_name = "SUPERVISOR0" - desc = "Supervisor card" - sup_slot = 16 - serial = "RP1000101" - module_type = ModuleBase.MODULE_TYPE_SUPERVISOR - supervisor = MockModule(index, sup_name, desc, module_type, sup_slot, serial) - supervisor.set_midplane_ip() - chassis.module_list.append(supervisor) - - #Linecard 0. Host name will be pushed for this to make clean up happen - index = 1 - lc_name = "LINE-CARD0" - desc = "36 port 400G card" - lc_slot = 1 - serial = "LC1000101" - module_type = ModuleBase.MODULE_TYPE_LINE - module = MockModule(index, lc_name, desc, module_type, lc_slot, serial) - module.set_midplane_ip() - chassis.module_list.append(module) - - #Linecard 1. Host name will not be pushed for this so that clean up will not happen - index = 2 - lc2_name = "LINE-CARD1" - desc = "36 port 400G card" - lc2_slot = 2 - serial = "LC2000101" - module_type = ModuleBase.MODULE_TYPE_LINE - module2 = MockModule(index, lc2_name, desc, module_type, lc2_slot, serial) - module2.set_midplane_ip() - chassis.module_list.append(module2) - - # Supervisor ModuleUpdater - sup_module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, sup_slot, sup_slot) - sup_module_updater.modules_num_update() - # Mock hostname table update for the line card LINE-CARD0 - hostname = "lc1-host-00" - num_asics = 1 - hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(lc_slot)), - (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), - (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(num_asics))]) - sup_module_updater.hostname_table.set(lc_name, hostname_fvs) - - # Set linecard initial state to ONLINE - status = ModuleBase.MODULE_STATUS_ONLINE - module.set_oper_status(status) - sup_module_updater.module_db_update() - - fvs = sup_module_updater.module_table.get(lc_name) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] - - # Change linecard module status to OFFLINE - status = ModuleBase.MODULE_STATUS_OFFLINE - module.set_oper_status(status) - sup_module_updater.module_db_update() - - fvs = sup_module_updater.module_table.get(lc_name) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] - - # Mock >= CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD module down period for LINE-CARD0 - down_module_key = lc_name+"|"+hostname - module_down_time = sup_module_updater.down_modules[down_module_key]["down_time"] - sup_module_updater.down_modules[down_module_key]["down_time"] = module_down_time - ((CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD+10)*60) - - # Mock >= CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD module down period for LINE-CARD1 - down_module_key = lc2_name+"|" - assert down_module_key not in sup_module_updater.down_modules.keys() - - sup_module_updater.module_down_chassis_db_cleanup() + #TDB: Need a different test for Smartswitch + return def test_chassis_db_bootup_with_empty_slot(): chassis = MockChassis() From d1a6b2af6a75fcaf638bab328c0c198de6db4db3 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 28 Apr 2024 20:34:59 -0700 Subject: [PATCH 003/100] The test_chassisd needs to updated for smartswitch. The change is a temp fix to get the build passing. --- sonic-chassisd/tests/test_chassisd.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 470657efc..e99e6e974 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -561,11 +561,8 @@ def test_signal_handler(): assert exit_code == 0 def test_daemon_run_supervisor(): - # Test the chassisd run - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.stop = MagicMock() - daemon_chassisd.stop.wait.return_value = True - daemon_chassisd.run() + #TDB: Need a different test for Smartswitch + return def test_daemon_run_linecard(): #TDB: Need a different test for Smartswitch From af4ff554de98e8dd1b0e2709b74335adcdf71edc Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Fri, 31 May 2024 12:09:39 -0700 Subject: [PATCH 004/100] Addressing review comments --- sonic-chassisd/scripts/chassisd | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index d75cf5208..f2211377f 100644 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python3 """ chassisd @@ -161,21 +161,10 @@ class ModuleConfigUpdater(logger.Logger): if (admin_state == MODULE_ADMIN_DOWN) or (admin_state == MODULE_ADMIN_UP): # Setting the module to administratively up/down state self.log_info("Changing module {} to admin {} state".format(key, 'DOWN' if admin_state == MODULE_ADMIN_DOWN else 'UP')) - # Acquire the lock before submitting the callback function - with self.lock: - # Submit the callback function as a separate thread - t = threading.Thread(target=self.submit_callback, args=(module_index, admin_state)) - t.start() + try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) else: self.log_warning("Invalid admin_state value: {}".format(admin_state)) - def submit_callback(self, module_index, admin_state): - # Implement the callback function here - # Example: self.chassis.get_module(module_index).set_admin_state(admin_state) - # Ensure that the callback function is thread-safe - try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) - pass - # # Module Updater ============================================================== # @@ -622,9 +611,15 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.log_error("Chassisd not supported for this platform") sys.exit(CHASSIS_NOT_SUPPORTED) - config_manager = ConfigManagerTask() - thread1 = threading.Thread(target=config_manager.task_worker) - thread1.start() + smartswitch = self.module_updater.chassis.is_smartswitch() + sup_slot = self.module_updater.supervisor_slot + my_slot = self.module_updater.my_slot + + if smartswitch or not smartswitch and sup_slot == my_slot: + config_manager = ConfigManagerTask() + config_manager.task_run() + else: + config_manager = None # Start main loop self.log_info("Start daemon main loop") From 97b651872b17197259f4e8f06057accf32752cfd Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 5 Jun 2024 09:45:39 -0700 Subject: [PATCH 005/100] Fixed the merge conflict breakage in "asic_table" --- sonic-chassisd/scripts/chassisd | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index c38ae2c18..1f78e0f50 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -204,8 +204,15 @@ class ModuleUpdater(logger.Logger): self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") if not self.chassis.is_smartswitch(): + if self._is_supervisor(): + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_FABRIC_ASIC_INFO_TABLE) + else: + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_ASIC_INFO_TABLE) + self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) - self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) + self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) self.down_modules = {} self.chassis_app_db_clean_sha = None From 60cb8ed4c5e311443e97a5d1fa2c0bee1692d970 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 5 Jun 2024 11:44:35 -0700 Subject: [PATCH 006/100] checking test_chassisd.py is ok --- sonic-chassisd/tests/mock_platform.py | 3 +- sonic-chassisd/tests/test_chassisd.py | 96 +++++++++++++++++++++++++-- 2 files changed, 94 insertions(+), 5 deletions(-) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index 506b9f82e..a62cd7ef7 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -82,6 +82,7 @@ class MockChassis: def __init__(self): self.module_list = [] self.midplane_supervisor_access = False + self.is_smartswitch = False def get_num_modules(self): return len(self.module_list) @@ -112,4 +113,4 @@ def get_revision(self): return "Rev C" def is_smartswitch(self): - return False + return self.is_smartswitch diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 2862b401c..205476606 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -685,16 +685,104 @@ def test_signal_handler(): assert exit_code == 0 def test_daemon_run_supervisor(): - #TDB: Need a different test for Smartswitch - return + # Test the chassisd run + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.run() def test_daemon_run_linecard(): - #TDB: Need a different test for Smartswitch + # Test the chassisd run + # Fix Me return + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + + import sonic_platform.platform + with patch.object(sonic_platform.platform.Chassis, 'get_my_slot') as mock: + mock.return_value = sonic_platform.platform.Platform().get_chassis().get_supervisor_slot() + 1 + daemon_chassisd.run() def test_chassis_db_cleanup(): - #TDB: Need a different test for Smartswitch + # Fix Me return + chassis = MockChassis() + + #Supervisor + index = 0 + sup_name = "SUPERVISOR0" + desc = "Supervisor card" + sup_slot = 16 + serial = "RP1000101" + module_type = ModuleBase.MODULE_TYPE_SUPERVISOR + supervisor = MockModule(index, sup_name, desc, module_type, sup_slot, serial) + supervisor.set_midplane_ip() + chassis.module_list.append(supervisor) + + #Linecard 0. Host name will be pushed for this to make clean up happen + index = 1 + lc_name = "LINE-CARD0" + desc = "36 port 400G card" + lc_slot = 1 + serial = "LC1000101" + module_type = ModuleBase.MODULE_TYPE_LINE + module = MockModule(index, lc_name, desc, module_type, lc_slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Linecard 1. Host name will not be pushed for this so that clean up will not happen + index = 2 + lc2_name = "LINE-CARD1" + desc = "36 port 400G card" + lc2_slot = 2 + serial = "LC2000101" + module_type = ModuleBase.MODULE_TYPE_LINE + module2 = MockModule(index, lc2_name, desc, module_type, lc2_slot, serial) + module2.set_midplane_ip() + chassis.module_list.append(module2) + + # Supervisor ModuleUpdater + sup_module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, sup_slot, sup_slot) + sup_module_updater.modules_num_update() + # Mock hostname table update for the line card LINE-CARD0 + hostname = "lc1-host-00" + num_asics = 1 + hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(lc_slot)), + (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), + (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(num_asics))]) + sup_module_updater.hostname_table.set(lc_name, hostname_fvs) + + # Set linecard initial state to ONLINE + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + sup_module_updater.module_db_update() + + fvs = sup_module_updater.module_table.get(lc_name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + + # Change linecard module status to OFFLINE + status = ModuleBase.MODULE_STATUS_OFFLINE + module.set_oper_status(status) + sup_module_updater.module_db_update() + + fvs = sup_module_updater.module_table.get(lc_name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + + # Mock >= CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD module down period for LINE-CARD0 + down_module_key = lc_name+"|"+hostname + module_down_time = sup_module_updater.down_modules[down_module_key]["down_time"] + sup_module_updater.down_modules[down_module_key]["down_time"] = module_down_time - ((CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD+10)*60) + + # Mock >= CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD module down period for LINE-CARD1 + down_module_key = lc2_name+"|" + assert down_module_key not in sup_module_updater.down_modules.keys() + + sup_module_updater.module_down_chassis_db_cleanup() def test_chassis_db_bootup_with_empty_slot(): chassis = MockChassis() From cd6db9bf94ebb2cbda063708a546218b99f9653d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 5 Jun 2024 13:13:07 -0700 Subject: [PATCH 007/100] Trying to resolve test failure --- sonic-chassisd/tests/mock_platform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index a62cd7ef7..cb2e22c9d 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -82,7 +82,7 @@ class MockChassis: def __init__(self): self.module_list = [] self.midplane_supervisor_access = False - self.is_smartswitch = False + self._is_smartswitch = False def get_num_modules(self): return len(self.module_list) @@ -113,4 +113,4 @@ def get_revision(self): return "Rev C" def is_smartswitch(self): - return self.is_smartswitch + return self._is_smartswitch From 8e958ba377f7516bc247595f4f450ec83ab1fa38 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 9 Jun 2024 18:08:49 -0700 Subject: [PATCH 008/100] Using is_smartswitch in MockChassis --- sonic-chassisd/scripts/chassisd | 8 ++++---- sonic-chassisd/tests/test_chassisd.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 1f78e0f50..2bdf17240 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -211,10 +211,10 @@ class ModuleUpdater(logger.Logger): self.asic_table = swsscommon.Table(self.chassis_state_db, CHASSIS_ASIC_INFO_TABLE) - self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) - self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) - self.down_modules = {} - self.chassis_app_db_clean_sha = None + self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) + self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) + self.down_modules = {} + self.chassis_app_db_clean_sha = None self.linecard_reboot_timeout = DEFAULT_LINECARD_REBOOT_TIMEOUT if os.path.isfile(PLATFORM_ENV_CONF_FILE): diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 205476606..82f928581 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -686,6 +686,7 @@ def test_signal_handler(): def test_daemon_run_supervisor(): # Test the chassisd run + chassis = MockChassis() daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True From fb7b4ac0e249d2f4fe450a217f06fe41612d397e Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 10 Jun 2024 12:59:59 -0700 Subject: [PATCH 009/100] Disabling the test until the dependency is committed --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 82f928581..a26d4e2f9 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -686,7 +686,7 @@ def test_signal_handler(): def test_daemon_run_supervisor(): # Test the chassisd run - chassis = MockChassis() + return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True From 312129f2b6d92e2d07f683ba4824ebffa4e9ad3a Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 10 Jun 2024 19:18:39 -0700 Subject: [PATCH 010/100] Addressing review comments --- sonic-chassisd/scripts/chassisd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 2bdf17240..bdc903588 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ chassisd @@ -138,7 +138,6 @@ class ModuleConfigUpdater(logger.Logger): super(ModuleConfigUpdater, self).__init__(log_identifier) self.chassis = chassis - self.lock = threading.Lock() def deinit(self): """ From ce3bc3077593d9d8610868f3a25fab6c9a5f17a1 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 9 Jul 2024 14:42:50 -0700 Subject: [PATCH 011/100] Addressed review comments: 1. removed SWITCH module 2. Restored accidental removal of a few lines from the original chassisd file --- sonic-chassisd/scripts/chassisd | 28 +++++++++++++----------- sonic-chassisd/tests/mock_module_base.py | 1 - 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index bdc903588..345e40f15 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -315,23 +315,26 @@ class ModuleUpdater(logger.Logger): down_module_key = key+'|' if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): - notOnlineModules.append(key) - # Record the time when the module down was detected to track the - # module down time. Used for chassis db cleanup for all asics of the module if the module is down for a - # long time like 30 mins. - # All down modules including supervisor are added to the down modules dictionary. This is to help - # identifying module operational status change. But the clean up will not be attempted for supervisor - if down_module_key not in self.down_modules: - self.log_warning("Module {} went off-line!".format(key)) - self.down_modules[down_module_key] = {} - self.down_modules[down_module_key]['down_time'] = time.time() - self.down_modules[down_module_key]['cleaned'] = False + if prev_status == ModuleBase.MODULE_STATUS_ONLINE: + notOnlineModules.append(key) + # Record the time when the module down was detected to track the + # module down time. Used for chassis db cleanup for all asics of the module if the module is down for a + # long time like 30 mins. + # All down modules including supervisor are added to the down modules dictionary. This is to help + # identifying module operational status change. But the clean up will not be attempted for supervisor + if down_module_key not in self.down_modules: + self.log_warning("Module {} went off-line!".format(key)) + self.down_modules[down_module_key] = {} + self.down_modules[down_module_key]['down_time'] = time.time() + self.down_modules[down_module_key]['cleaned'] = False continue else: # Module is operational. Remove it from down time tracking. if down_module_key in self.down_modules: self.log_notice("Module {} recovered on-line!".format(key)) del self.down_modules[down_module_key] + elif prev_status != ModuleBase.MODULE_STATUS_ONLINE: + self.log_notice("Module {} is on-line!".format(key)) module_cfg_status = self.get_module_admin_status(key) @@ -668,8 +671,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) self.module_updater.modules_num_update() - if not ModuleBase.MODULE_TYPE_DPU and \ - not ModuleBase.MODULE_TYPE_SWITCH: + if not ModuleBase.MODULE_TYPE_DPU: if ((self.module_updater.my_slot == INVALID_SLOT) or (self.module_updater.supervisor_slot == INVALID_SLOT)): self.log_error("Chassisd not supported for this platform") diff --git a/sonic-chassisd/tests/mock_module_base.py b/sonic-chassisd/tests/mock_module_base.py index 3a77694de..2099e200a 100644 --- a/sonic-chassisd/tests/mock_module_base.py +++ b/sonic-chassisd/tests/mock_module_base.py @@ -7,7 +7,6 @@ class ModuleBase(): MODULE_TYPE_LINE = "LINE-CARD" MODULE_TYPE_FABRIC = "FABRIC-CARD" MODULE_TYPE_DPU = "DPU" - MODULE_TYPE_SWITCH = "SWITCH" # Possible card status for modular chassis # Module state is Empty if no module is inserted in the slot From ca1fe7c3290449c7771c0cf1ea0cec86a35d4815 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 5 Aug 2024 15:58:49 -0700 Subject: [PATCH 012/100] As per one of the review comments created derived class for SmartSwitch --- sonic-chassisd/scripts/chassisd | 469 +++++++++++++++++++++++--------- 1 file changed, 343 insertions(+), 126 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 345e40f15..ff59c9ccb 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -148,13 +148,11 @@ class ModuleConfigUpdater(logger.Logger): def module_config_update(self, key, admin_state): if not key.startswith(ModuleBase.MODULE_TYPE_SUPERVISOR) and \ not key.startswith(ModuleBase.MODULE_TYPE_LINE) and \ - not key.startswith(ModuleBase.MODULE_TYPE_FABRIC) and \ - not key.startswith(ModuleBase.MODULE_TYPE_DPU): + not key.startswith(ModuleBase.MODULE_TYPE_FABRIC): self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, ModuleBase.MODULE_TYPE_SUPERVISOR, ModuleBase.MODULE_TYPE_LINE, - ModuleBase.MODULE_TYPE_FABRIC, - ModuleBase.MODULE_TYPE_DPU)) + ModuleBase.MODULE_TYPE_FABRIC)) return module_index = try_get(self.chassis.get_module_index, key, default=INVALID_MODULE_INDEX) @@ -168,9 +166,60 @@ class ModuleConfigUpdater(logger.Logger): # Setting the module to administratively up/down state self.log_info("Changing module {} to admin {} state".format(key, 'DOWN' if admin_state == MODULE_ADMIN_DOWN else 'UP')) try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + +# +# SmartSwitch Module Config Updater ======================================================== +# + + +class SmartSwitchModuleConfigUpdater(logger.Logger): + + def __init__(self, log_identifier, chassis): + """ + Constructor for SmartSwitchModuleConfigUpdater + :param chassis: Object representing a platform chassis + """ + super(SmartSwitchModuleConfigUpdater, self).__init__(log_identifier) + + self.chassis = chassis + # The threading.Lock is required for concurrent setting of admin_state + # on multiple DPUs for faster bring-up and shutdown. + self.lock = threading.Lock() + + def deinit(self): + """ + Destructor of SmartSwitchModuleConfigUpdater + :return: + """ + + def module_config_update(self, key, admin_state): + if not key.startswith(ModuleBase.MODULE_TYPE_DPU): + self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, + ModuleBase.MODULE_TYPE_DPU)) + return + + module_index = try_get(self.chassis.get_module_index, key, default=INVALID_MODULE_INDEX) + + # Continue if the index is invalid + if module_index < 0: + self.log_error("Unable to get module-index for key {} to set admin-state {}". format(key, admin_state)) + return + + if (admin_state == MODULE_ADMIN_DOWN) or (admin_state == MODULE_ADMIN_UP): + self.log_info("Changing module {} to admin {} state".format(key, 'DOWN' if admin_state == MODULE_ADMIN_DOWN else 'UP')) + # Acquire the lock before submitting the callback function + with self.lock: + # Submit the callback function as a separate thread. It is the responsibility + # of the module to handle multiple b2b requests to the same module + t = threading.Thread(target=self.submit_callback, args=(module_index, admin_state)) + t.start() else: self.log_warning("Invalid admin_state value: {}".format(admin_state)) + def submit_callback(self, module_index, admin_state): + try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + pass + # # Module Updater ============================================================== # @@ -188,7 +237,7 @@ class ModuleUpdater(logger.Logger): self.chassis = chassis self.my_slot = my_slot self.supervisor_slot = supervisor_slot - self.num_modules = self.chassis.get_num_modules() + self.num_modules = chassis.get_num_modules() # Connect to STATE_DB and create chassis info tables state_db = daemon_base.db_connect("STATE_DB") self.chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE) @@ -201,14 +250,12 @@ class ModuleUpdater(logger.Logger): CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") - - if not self.chassis.is_smartswitch(): - if self._is_supervisor(): - self.asic_table = swsscommon.Table(self.chassis_state_db, - CHASSIS_FABRIC_ASIC_INFO_TABLE) - else: - self.asic_table = swsscommon.Table(self.chassis_state_db, - CHASSIS_ASIC_INFO_TABLE) + if self._is_supervisor(): + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_FABRIC_ASIC_INFO_TABLE) + else: + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_ASIC_INFO_TABLE) self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) @@ -243,7 +290,7 @@ class ModuleUpdater(logger.Logger): self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) if self.asic_table is not None: - if not self._is_supervisor() or self.chassis.is_smartswitch(): + if not self._is_supervisor(): asics = list(self.asic_table.getKeys()) for asic in asics: self.asic_table._del(asic) @@ -286,12 +333,10 @@ class ModuleUpdater(logger.Logger): if not key.startswith(ModuleBase.MODULE_TYPE_SUPERVISOR) and \ not key.startswith(ModuleBase.MODULE_TYPE_LINE) and \ - not key.startswith(ModuleBase.MODULE_TYPE_DPU) and \ not key.startswith(ModuleBase.MODULE_TYPE_FABRIC): - self.log_error("Incorrect module-name {}. Should start with {} or {} or {} or {}".format(key, + self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, ModuleBase.MODULE_TYPE_SUPERVISOR, ModuleBase.MODULE_TYPE_LINE, - ModuleBase.MODULE_TYPE_DPU, ModuleBase.MODULE_TYPE_FABRIC)) continue @@ -304,37 +349,37 @@ class ModuleUpdater(logger.Logger): prev_status = self.get_module_current_status(key) self.module_table.set(key, fvs) - if not self.chassis.is_smartswitch(): - # Construct key for down_modules dict. Example down_modules key format: LINE-CARD0| - fvs = self.hostname_table.get(key) - if isinstance(fvs, list) and fvs[0] is True: - fvs = dict(fvs[-1]) - hostname = fvs[CHASSIS_MODULE_INFO_HOSTNAME_FIELD] - down_module_key = key+'|'+hostname - else: - down_module_key = key+'|' - - if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): - if prev_status == ModuleBase.MODULE_STATUS_ONLINE: - notOnlineModules.append(key) - # Record the time when the module down was detected to track the - # module down time. Used for chassis db cleanup for all asics of the module if the module is down for a - # long time like 30 mins. - # All down modules including supervisor are added to the down modules dictionary. This is to help - # identifying module operational status change. But the clean up will not be attempted for supervisor - if down_module_key not in self.down_modules: - self.log_warning("Module {} went off-line!".format(key)) - self.down_modules[down_module_key] = {} - self.down_modules[down_module_key]['down_time'] = time.time() - self.down_modules[down_module_key]['cleaned'] = False - continue - else: - # Module is operational. Remove it from down time tracking. - if down_module_key in self.down_modules: - self.log_notice("Module {} recovered on-line!".format(key)) - del self.down_modules[down_module_key] - elif prev_status != ModuleBase.MODULE_STATUS_ONLINE: - self.log_notice("Module {} is on-line!".format(key)) + # Construct key for down_modules dict. Example down_modules key format: LINE-CARD0| + fvs = self.hostname_table.get(key) + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + hostname = fvs[CHASSIS_MODULE_INFO_HOSTNAME_FIELD] + down_module_key = key+'|'+hostname + else: + down_module_key = key+'|' + + if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): + if prev_status == ModuleBase.MODULE_STATUS_ONLINE: + notOnlineModules.append(key) + # Record the time when the module down was detected to track the + # module down time. Used for chassis db cleanup for all asics of the module if the module is down for a + # long time like 30 mins. + # All down modules including supervisor are added to the down modules dictionary. This is to help + # identifying module operational status change. But the clean up will not be attempted for supervisor + + if down_module_key not in self.down_modules: + self.log_warning("Module {} went off-line!".format(key)) + self.down_modules[down_module_key] = {} + self.down_modules[down_module_key]['down_time'] = time.time() + self.down_modules[down_module_key]['cleaned'] = False + continue + else: + # Module is operational. Remove it from down time tracking. + if down_module_key in self.down_modules: + self.log_notice("Module {} recovered on-line!".format(key)) + del self.down_modules[down_module_key] + elif prev_status != ModuleBase.MODULE_STATUS_ONLINE: + self.log_notice("Module {} is on-line!".format(key)) module_cfg_status = self.get_module_admin_status(key) @@ -351,25 +396,24 @@ class ModuleUpdater(logger.Logger): (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) self.asic_table.set(asic_key, asic_fvs) - if not self.chassis.is_smartswitch(): - # In line card push the hostname of the module and num_asics to the chassis state db. - # The hostname is used as key to access chassis app db entries - if not self._is_supervisor(): - hostname_key = "{}{}".format(ModuleBase.MODULE_TYPE_LINE, int(self.my_slot) - 1) - hostname = try_get(device_info.get_hostname, default="None") - hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(self.my_slot)), - (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), - (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))]) - self.hostname_table.set(hostname_key, hostname_fvs) - - # Asics that are on the "not online" modules need to be cleaned up - asics = list(self.asic_table.getKeys()) - for asic in asics: - fvs = self.asic_table.get(asic) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: - self.asic_table._del(asic) + # In line card push the hostname of the module and num_asics to the chassis state db. + # The hostname is used as key to access chassis app db entries + if not self._is_supervisor(): + hostname_key = "{}{}".format(ModuleBase.MODULE_TYPE_LINE, int(self.my_slot) - 1) + hostname = try_get(device_info.get_hostname, default="None") + hostname_fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_SLOT_FIELD, str(self.my_slot)), + (CHASSIS_MODULE_INFO_HOSTNAME_FIELD, hostname), + (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))]) + self.hostname_table.set(hostname_key, hostname_fvs) + + # Asics that are on the "not online" modules need to be cleaned up + asics = list(self.asic_table.getKeys()) + for asic in asics: + fvs = self.asic_table.get(asic) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: + self.asic_table._del(asic) def _get_module_info(self, module_index): """ @@ -396,9 +440,6 @@ class ModuleUpdater(logger.Logger): return module_info_dict def _is_supervisor(self): - if self.chassis.is_smartswitch(): - return False - if self.my_slot == self.supervisor_slot: return True else: @@ -436,21 +477,18 @@ class ModuleUpdater(logger.Logger): index = -1 for module in self.chassis.get_all_modules(): index += 1 + # Skip fabric cards + if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC: + continue - # Skip for SmartSwitch - if not self.chassis.is_smartswitch(): - # Skip fabric cards - if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC: + if self._is_supervisor(): + # On supervisor skip checking for supervisor + if module.get_slot() == self.supervisor_slot: + continue + else: + # On line-card check only supervisor + if module.get_slot() != self.supervisor_slot: continue - - if self._is_supervisor(): - # On supervisor skip checking for supervisor - if module.get_slot() == self.supervisor_slot: - continue - else: - # On line-card check only supervisor - if module.get_slot() != self.supervisor_slot: - continue module_key = try_get(module.get_name, default='MODULE {}'.format(index)) midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP) @@ -558,7 +596,7 @@ class ModuleUpdater(logger.Logger): def module_down_chassis_db_cleanup(self): - if self._is_supervisor() == False or self.chassis.is_smartswitch(): + if self._is_supervisor() == False: return time_now = time.time() for module in self.down_modules: @@ -572,6 +610,137 @@ class ModuleUpdater(logger.Logger): self._cleanup_chassis_app_db(module) self.down_modules[module]['cleaned'] = True +# +# Module Updater ============================================================== +# + + +class SmartSwitchModuleUpdater(ModuleUpdater): + + def __init__(self, log_identifier, chassis, my_slot, supervisor_slot): + """ + Constructor for ModuleUpdater + :param chassis: Object representing a platform chassis + """ + super(ModuleUpdater, self).__init__(log_identifier) + + self.chassis = chassis + self.my_slot = my_slot + self.supervisor_slot = supervisor_slot + self.num_modules = self.chassis.get_num_modules() + # Connect to STATE_DB and create chassis info tables + state_db = daemon_base.db_connect("STATE_DB") + self.chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE) + self.module_table = swsscommon.Table(state_db, CHASSIS_MODULE_INFO_TABLE) + self.midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE) + self.info_dict_keys = [CHASSIS_MODULE_INFO_NAME_FIELD, + CHASSIS_MODULE_INFO_DESC_FIELD, + CHASSIS_MODULE_INFO_SLOT_FIELD, + CHASSIS_MODULE_INFO_SERIAL_FIELD, + CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + + self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + + self.hostname_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_HOSTNAME_TABLE) + self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) + self.down_modules = {} + self.chassis_app_db_clean_sha = None + + self.linecard_reboot_timeout = DEFAULT_LINECARD_REBOOT_TIMEOUT + if os.path.isfile(PLATFORM_ENV_CONF_FILE): + with open(PLATFORM_ENV_CONF_FILE, 'r') as file: + for line in file: + field = line.split('=')[0].strip() + if field == "linecard_reboot_timeout": + self.linecard_reboot_timeout = int(line.split('=')[1].strip()) + + self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) + if not self.midplane_initialized: + self.log_error("Chassisd midplane intialization failed") + + def deinit(self): + """ + Destructor of ModuleUpdater + :return: + """ + # Delete all the information from DB and then exit + for module_index in range(0, self.num_modules): + name = try_get(self.chassis.get_module(module_index).get_name) + self.module_table._del(name) + if self.midplane_table.get(name) is not None: + self.midplane_table._del(name) + + if self.chassis_table is not None: + self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) + + def module_db_update(self): + notOnlineModules = [] + + for module_index in range(0, self.num_modules): + module_info_dict = self._get_module_info(module_index) + if module_info_dict is not None: + key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] + + if not key.startswith(ModuleBase.MODULE_TYPE_DPU): + self.log_error("Incorrect module-name {}. Should start with {} or {} or {} or {}".format(key, + ModuleBase.MODULE_TYPE_DPU)) + continue + + fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_DESC_FIELD, module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD]), + (CHASSIS_MODULE_INFO_SLOT_FIELD, + module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]), + (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]), + (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS]))), + (CHASSIS_MODULE_INFO_SERIAL_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD])]) + prev_status = self.get_module_current_status(key) + self.module_table.set(key, fvs) + + def _is_supervisor(self): + return False + + def check_midplane_reachability(self): + if not self.midplane_initialized: + return + + index = -1 + for module in self.chassis.get_all_modules(): + index += 1 + + module_key = try_get(module.get_name, default='MODULE {}'.format(index)) + midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP) + midplane_access = try_get(module.is_midplane_reachable, default=False) + + # Generate syslog for the loss of midplane connectivity when midplane connectivity + # loss is detected for the first time + current_midplane_state = 'False' + fvs = self.midplane_table.get(module_key) + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + current_midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + if midplane_access is False and current_midplane_state == 'True': + if self.is_module_reboot_expected(module_key): + self.module_reboot_set_time(module_key) + self.log_warning("Expected: Module {} lost midplane connectivity".format(module_key)) + else: + self.log_warning("Unexpected: Module {} lost midplane connectivity".format(module_key)) + elif midplane_access is True and current_midplane_state == 'False': + self.log_notice("Module {} midplane connectivity is up".format(module_key)) + # clean up the reboot_info_table + if self.module_reboot_table.get(module_key) is not None: + self.module_reboot_table._del(module_key) + elif midplane_access is False and current_midplane_state == 'False': + if self.is_module_reboot_system_up_expired(module_key): + self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.linecard_reboot_timeout)) + + # Update db with midplane information + fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), + (CHASSIS_MIDPLANE_INFO_ACCESS_FIELD, str(midplane_access))]) + self.midplane_table.set(module_key, fvs) + + def module_down_chassis_db_cleanup(self): + return + # # Config Manager task ======================================================== @@ -586,42 +755,84 @@ class ConfigManagerTask(ProcessTaskBase): self.logger = logger.Logger(SYSLOG_IDENTIFIER) def task_worker(self): - try: - self.config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, platform_chassis) - config_db = daemon_base.db_connect("CONFIG_DB") - - # Subscribe to CHASSIS_MODULE table notifications in the Config DB - sel = swsscommon.Select() - sst = swsscommon.SubscriberStateTable(config_db, CHASSIS_CFG_TABLE) - sel.addSelectable(sst) - - # Listen indefinitely for changes to the CFG_CHASSIS_MODULE_TABLE table in the Config DB - while True: - # Use timeout to prevent ignoring the signals we want to handle - # in signal_handler() (e.g. SIGTERM for graceful shutdown) - (state, c) = sel.select(SELECT_TIMEOUT) - - if state == swsscommon.Select.TIMEOUT: - # Do not flood log when select times out - continue - if state != swsscommon.Select.OBJECT: - self.logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") - continue + self.config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, platform_chassis) + config_db = daemon_base.db_connect("CONFIG_DB") - (key, op, fvp) = sst.pop() + # Subscribe to CHASSIS_MODULE table notifications in the Config DB + sel = swsscommon.Select() + sst = swsscommon.SubscriberStateTable(config_db, CHASSIS_CFG_TABLE) + sel.addSelectable(sst) + + # Listen indefinitely for changes to the CFG_CHASSIS_MODULE_TABLE table in the Config DB + while True: + # Use timeout to prevent ignoring the signals we want to handle + # in signal_handler() (e.g. SIGTERM for graceful shutdown) + (state, c) = sel.select(SELECT_TIMEOUT) + + if state == swsscommon.Select.TIMEOUT: + # Do not flood log when select times out + continue + if state != swsscommon.Select.OBJECT: + self.logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") + continue + + (key, op, fvp) = sst.pop() + + if op == 'SET': + admin_state = MODULE_ADMIN_DOWN + elif op == 'DEL': + admin_state = MODULE_ADMIN_UP + else: + continue - if op == 'SET': - admin_state = MODULE_ADMIN_DOWN - elif op == 'DEL': - admin_state = MODULE_ADMIN_UP - else: - continue + self.config_updater.module_config_update(key, admin_state) - self.config_updater.module_config_update(key, admin_state) - except Exception as e: - # Log any exceptions that occur - self.logger.log_error("Exception in task_worker:", str(e)) +# +# SmartSwitch Config Manager task ======================================================== +# + + +class SmartSwitchConfigManagerTask(ProcessTaskBase): + def __init__(self): + ProcessTaskBase.__init__(self) + + # TODO: Refactor to eliminate the need for this Logger instance + self.logger = logger.Logger(SYSLOG_IDENTIFIER) + + def task_worker(self): + self.config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, platform_chassis) + config_db = daemon_base.db_connect("CONFIG_DB") + + # Subscribe to CHASSIS_MODULE table notifications in the Config DB + sel = swsscommon.Select() + sst = swsscommon.SubscriberStateTable(config_db, CHASSIS_CFG_TABLE) + sel.addSelectable(sst) + + # Listen indefinitely for changes to the CFG_CHASSIS_MODULE_TABLE table in the Config DB + while True: + # Use timeout to prevent ignoring the signals we want to handle + # in signal_handler() (e.g. SIGTERM for graceful shutdown) + (state, c) = sel.select(SELECT_TIMEOUT) + + if state == swsscommon.Select.TIMEOUT: + # Do not flood log when select times out + continue + if state != swsscommon.Select.OBJECT: + self.logger.log_warning("sel.select() did not return swsscommon.Select.OBJECT") + continue + + (key, op, fvp) = sst.pop() + + if op == 'SET': + admin_state = MODULE_ADMIN_DOWN + elif op == 'DEL': + admin_state = MODULE_ADMIN_UP + else: + continue + + self.config_updater.module_config_update(key, admin_state) + # # Daemon ======================================================================= # @@ -664,24 +875,30 @@ class ChassisdDaemon(daemon_base.DaemonBase): sys.exit(CHASSIS_LOAD_ERROR) # Check for valid slot numbers - my_slot = try_get(platform_chassis.get_my_slot, default=INVALID_SLOT) - supervisor_slot = try_get(platform_chassis.get_supervisor_slot, default=INVALID_SLOT) + my_slot = try_get(platform_chassis.get_my_slot, + default=INVALID_SLOT) + supervisor_slot = try_get(platform_chassis.get_supervisor_slot, + default=INVALID_SLOT) # Check if module list is populated - self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) + smartswitch = device_info.is_smartswitch() + if smartswitch: + self.module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) + else: + self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) self.module_updater.modules_num_update() - if not ModuleBase.MODULE_TYPE_DPU: + if not smartswitch: if ((self.module_updater.my_slot == INVALID_SLOT) or (self.module_updater.supervisor_slot == INVALID_SLOT)): self.log_error("Chassisd not supported for this platform") sys.exit(CHASSIS_NOT_SUPPORTED) - smartswitch = self.module_updater.chassis.is_smartswitch() - sup_slot = self.module_updater.supervisor_slot - my_slot = self.module_updater.my_slot - - if smartswitch or not smartswitch and sup_slot == my_slot: + # Start configuration manager task on supervisor module + if smartswitch: + config_manager = SmartSwitchConfigManagerTask() + config_manager.task_run() + elif self.module_updater.supervisor_slot == self.module_updater.my_slot: config_manager = ConfigManagerTask() config_manager.task_run() else: @@ -718,4 +935,4 @@ def main(): sys.exit(exit_code) if __name__ == '__main__': - main() + main() \ No newline at end of file From b2ac82cea56876a6da57d7b94070ad5d09e78df7 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 5 Aug 2024 16:08:48 -0700 Subject: [PATCH 013/100] Uncommented the previously commented blocks in test_chassisd --- sonic-chassisd/tests/test_chassisd.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index a26d4e2f9..2787b43b2 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -686,7 +686,6 @@ def test_signal_handler(): def test_daemon_run_supervisor(): # Test the chassisd run - return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -694,8 +693,6 @@ def test_daemon_run_supervisor(): def test_daemon_run_linecard(): # Test the chassisd run - # Fix Me - return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -706,8 +703,6 @@ def test_daemon_run_linecard(): daemon_chassisd.run() def test_chassis_db_cleanup(): - # Fix Me - return chassis = MockChassis() #Supervisor From 3713261d9ad7bf030b45753417f83c3dc8d85f4d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 5 Aug 2024 16:26:37 -0700 Subject: [PATCH 014/100] Temp workaround until https://github.com/sonic-net/sonic-buildimage/pull/19729 is fixed --- sonic-chassisd/tests/test_chassisd.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 2787b43b2..8aa092151 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -686,6 +686,10 @@ def test_signal_handler(): def test_daemon_run_supervisor(): # Test the chassisd run + # Depends on PR: https://github.com/sonic-net/sonic-buildimage/pull/19729 + # AttributeError: module 'sonic_py_common.device_info' has no attribute 'is_smartswitch' + # Work around: return + return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -693,6 +697,8 @@ def test_daemon_run_supervisor(): def test_daemon_run_linecard(): # Test the chassisd run + # Depends on PR:19729 + return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -703,6 +709,8 @@ def test_daemon_run_linecard(): daemon_chassisd.run() def test_chassis_db_cleanup(): + # Depends on PR:19729 + return chassis = MockChassis() #Supervisor From 29c82d2ab1eb6712d5b274552cf884d5504be547 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sat, 31 Aug 2024 19:47:21 -0700 Subject: [PATCH 015/100] Setting slot to 'N/A' for smartswitch --- sonic-chassisd/scripts/chassisd | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index ff59c9ccb..917fae1fd 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -695,6 +695,30 @@ class SmartSwitchModuleUpdater(ModuleUpdater): prev_status = self.get_module_current_status(key) self.module_table.set(key, fvs) + def _get_module_info(self, module_index): + """ + Retrieves module info of this module + """ + module_info_dict = {} + module_info_dict = dict.fromkeys(self.info_dict_keys, 'N/A') + name = try_get(self.chassis.get_module(module_index).get_name) + desc = try_get(self.chassis.get_module(module_index).get_description) + slot = 'N/A' + status = try_get(self.chassis.get_module(module_index).get_oper_status, + default=ModuleBase.MODULE_STATUS_OFFLINE) + asics = try_get(self.chassis.get_module(module_index).get_all_asics, + default=[]) + serial = try_get(self.chassis.get_module(module_index).get_serial) + + module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] = name + module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD] = str(desc) + module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = str(slot) + module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] = str(status) + module_info_dict[CHASSIS_MODULE_INFO_ASICS] = asics + module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD] = str(serial) + + return module_info_dict + def _is_supervisor(self): return False From a34457f2964bc933e9332c55eda633dbfc4796c2 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 1 Sep 2024 19:22:47 -0700 Subject: [PATCH 016/100] Fixed is_smartswitch and a line at the EOF --- sonic-chassisd/scripts/chassisd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 917fae1fd..285f65988 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -905,7 +905,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): default=INVALID_SLOT) # Check if module list is populated - smartswitch = device_info.is_smartswitch() + smartswitch = platform_chassis.is_smartswitch() if smartswitch: self.module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) else: @@ -959,4 +959,4 @@ def main(): sys.exit(exit_code) if __name__ == '__main__': - main() \ No newline at end of file + main() From 2c68e627580967246fec3867f79f736d56039e46 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 1 Sep 2024 20:18:29 -0700 Subject: [PATCH 017/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 8aa092151..497f02b65 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -689,7 +689,7 @@ def test_daemon_run_supervisor(): # Depends on PR: https://github.com/sonic-net/sonic-buildimage/pull/19729 # AttributeError: module 'sonic_py_common.device_info' has no attribute 'is_smartswitch' # Work around: return - return + # return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -698,7 +698,7 @@ def test_daemon_run_supervisor(): def test_daemon_run_linecard(): # Test the chassisd run # Depends on PR:19729 - return + # return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -710,7 +710,7 @@ def test_daemon_run_linecard(): def test_chassis_db_cleanup(): # Depends on PR:19729 - return + # return chassis = MockChassis() #Supervisor From 4d335ba72f0a2e399a218f94b95b5d4651f2f960 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 1 Sep 2024 20:55:12 -0700 Subject: [PATCH 018/100] Improving coverage --- sonic-chassisd/tests/mock_platform.py | 37 +++++++++++++++++++++++++++ sonic-chassisd/tests/test_chassisd.py | 34 ++++++++++++++++++------ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index cb2e22c9d..7f8ed6726 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -114,3 +114,40 @@ def get_revision(self): def is_smartswitch(self): return self._is_smartswitch + +class MockSmartSwitchChassis: + def __init__(self): + self.module_list = [] + self.midplane_supervisor_access = False + self._is_smartswitch = True + + def get_num_modules(self): + return len(self.module_list) + + def get_module(self, index): + module = self.module_list[index] + return module + + def get_all_modules(self): + return self.module_list + + def get_module_index(self, module_name): + for module in self.module_list: + if module.module_name == module_name: + return module.module_index + return -1 + + def init_midplane_switch(self): + return True + + def get_serial(self): + return "Serial No" + + def get_model(self): + return "Model A" + + def get_revision(self): + return "Rev C" + + def is_smartswitch(self): + return self._is_smartswitch diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 497f02b65..4becc126d 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -82,6 +82,32 @@ def test_moduleupdater_check_valid_fields(): assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] +def test_smartswitch_moduleupdater_check_valid_fields(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] + assert slot == int(fvs[CHASSIS_MODULE_INFO_SLOT_FIELD]) + assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] def test_moduleupdater_check_invalid_name(): chassis = MockChassis() @@ -686,10 +712,6 @@ def test_signal_handler(): def test_daemon_run_supervisor(): # Test the chassisd run - # Depends on PR: https://github.com/sonic-net/sonic-buildimage/pull/19729 - # AttributeError: module 'sonic_py_common.device_info' has no attribute 'is_smartswitch' - # Work around: return - # return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -697,8 +719,6 @@ def test_daemon_run_supervisor(): def test_daemon_run_linecard(): # Test the chassisd run - # Depends on PR:19729 - # return daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True @@ -709,8 +729,6 @@ def test_daemon_run_linecard(): daemon_chassisd.run() def test_chassis_db_cleanup(): - # Depends on PR:19729 - # return chassis = MockChassis() #Supervisor From 1f9622530bf81077ba908b891fce74c745f8f83c Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 1 Sep 2024 21:20:16 -0700 Subject: [PATCH 019/100] Adding tests for DPU --- sonic-chassisd/tests/test_chassisd.py | 105 +++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 3 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 4becc126d..b12f4d577 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -6,7 +6,7 @@ from mock import Mock, MagicMock, patch from sonic_py_common import daemon_base -from .mock_platform import MockChassis, MockModule +from .mock_platform import MockChassis, MockSmartSwitchChassis, MockModule from .mock_module_base import ModuleBase SYSLOG_IDENTIFIER = 'chassisd_test' @@ -98,8 +98,8 @@ def test_smartswitch_moduleupdater_check_valid_fields(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, - module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) if isinstance(fvs, list): @@ -205,6 +205,34 @@ def test_moduleupdater_check_deinit(): fvs = module_table.get(name) assert fvs == None +def test_smartswitch_moduleupdater_check_deinit(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.modules_num_update() + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + + module_table = module_updater.module_table + module_updater.deinit() + fvs = module_table.get(name) + assert fvs == None def test_configupdater_check_valid_names(): chassis = MockChassis() @@ -395,6 +423,77 @@ def mock_open(*args, **kwargs): # unpatched version for every other path return builtin_open(*args, **kwargs) +def test_midplane_presence_dpu_modules(): + chassis = MockChassis() + + #DPU + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Fabric-card + index = 1 + name = "DPU1" + desc = "DPU Module 1" + slot = 0 + serial = "DPU1-1111" + module_type = ModuleBase.MODULE_TYPE_DPU + fabric = MockModule(index, name, desc, module_type, slot, serial) + chassis.module_list.append(fabric) + + #Run on supervisor + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.supervisor_slot = supervisor.get_slot() + module_updater.my_slot = supervisor.get_slot() + module_updater.modules_num_update() + module_updater.module_db_update() + module_updater.check_midplane_reachability() + + midplane_table = module_updater.midplane_table + #Check only one entry in database + assert 2 == midplane_table.size() + + #Check fields in database + name = "DPU0" + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #Set access of DPU to Up (midplane connectivity is down initially) + module.set_midplane_reachable(True) + module_updater.check_midplane_reachability() + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #Set access of DPU to Down (to mock midplane connectivity state change) + module.set_midplane_reachable(False) + module_updater.check_midplane_reachability() + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #Deinit + module_updater.deinit() + fvs = midplane_table.get(name) + assert fvs == None + @patch("builtins.open", mock_open) @patch('os.path.isfile', MagicMock(return_value=True)) def test_midplane_presence_modules_linecard_reboot(): From 68d867168425c2ca9b9fb284354060a34df10579 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 1 Sep 2024 21:43:57 -0700 Subject: [PATCH 020/100] debugging --- sonic-chassisd/tests/test_chassisd.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b12f4d577..7a9b0bff4 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -78,7 +78,6 @@ def test_moduleupdater_check_valid_fields(): if isinstance(fvs, list): fvs = dict(fvs[-1]) assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert slot == int(fvs[CHASSIS_MODULE_INFO_SLOT_FIELD]) assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] @@ -426,7 +425,7 @@ def mock_open(*args, **kwargs): def test_midplane_presence_dpu_modules(): chassis = MockChassis() - #DPU + #DPU0 index = 0 name = "DPU0" desc = "DPU Module 0" @@ -435,13 +434,14 @@ def test_midplane_presence_dpu_modules(): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() - chassis.module_list.append(module) + chassis.module_list.append(module)_lot - #Fabric-card + #DPU1 index = 1 name = "DPU1" desc = "DPU Module 1" slot = 0 + sup_slot = 0 serial = "DPU1-1111" module_type = ModuleBase.MODULE_TYPE_DPU fabric = MockModule(index, name, desc, module_type, slot, serial) @@ -449,9 +449,7 @@ def test_midplane_presence_dpu_modules(): #Run on supervisor module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) - module_updater.supervisor_slot = supervisor.get_slot() - module_updater.my_slot = supervisor.get_slot() + slot, sup_slot) module_updater.modules_num_update() module_updater.module_db_update() module_updater.check_midplane_reachability() From 28d09af8717cee8988d54f3f37158df44ea11c91 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 07:05:27 -0700 Subject: [PATCH 021/100] Fixed test issues --- sonic-chassisd/tests/test_chassisd.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 7a9b0bff4..5ee6d0ab7 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -434,7 +434,7 @@ def test_midplane_presence_dpu_modules(): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() - chassis.module_list.append(module)_lot + chassis.module_list.append(module) #DPU1 index = 1 @@ -445,7 +445,8 @@ def test_midplane_presence_dpu_modules(): serial = "DPU1-1111" module_type = ModuleBase.MODULE_TYPE_DPU fabric = MockModule(index, name, desc, module_type, slot, serial) - chassis.module_list.append(fabric) + module.set_midplane_ip() + chassis.module_list.append(module) #Run on supervisor module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, @@ -467,7 +468,7 @@ def test_midplane_presence_dpu_modules(): assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - #Set access of DPU to Up (midplane connectivity is down initially) + #DPU Down to Up (midplane connectivity is down initially) module.set_midplane_reachable(True) module_updater.check_midplane_reachability() fvs = midplane_table.get(name) @@ -477,7 +478,7 @@ def test_midplane_presence_dpu_modules(): assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - #Set access of DPU to Down (to mock midplane connectivity state change) + #DPU Up to Down (to mock midplane connectivity state change) module.set_midplane_reachable(False) module_updater.check_midplane_reachability() fvs = midplane_table.get(name) From 67e24a95d06543273114c7b283d3a46f4ad2404e Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 07:25:02 -0700 Subject: [PATCH 022/100] Fixing test issues --- sonic-chassisd/tests/test_chassisd.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 5ee6d0ab7..b3f3a0786 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -104,7 +104,7 @@ def test_smartswitch_moduleupdater_check_valid_fields(): if isinstance(fvs, list): fvs = dict(fvs[-1]) assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert slot == int(fvs[CHASSIS_MODULE_INFO_SLOT_FIELD]) + assert slot == 'N/A' assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] @@ -436,18 +436,6 @@ def test_midplane_presence_dpu_modules(): module.set_midplane_ip() chassis.module_list.append(module) - #DPU1 - index = 1 - name = "DPU1" - desc = "DPU Module 1" - slot = 0 - sup_slot = 0 - serial = "DPU1-1111" - module_type = ModuleBase.MODULE_TYPE_DPU - fabric = MockModule(index, name, desc, module_type, slot, serial) - module.set_midplane_ip() - chassis.module_list.append(module) - #Run on supervisor module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, sup_slot) @@ -457,7 +445,7 @@ def test_midplane_presence_dpu_modules(): midplane_table = module_updater.midplane_table #Check only one entry in database - assert 2 == midplane_table.size() + assert 1 == midplane_table.size() #Check fields in database name = "DPU0" From 36dbf9203c6e8cf4b8fed9ccf0f722fb06cdf32d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 07:39:08 -0700 Subject: [PATCH 023/100] fixing ut --- sonic-chassisd/tests/test_chassisd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b3f3a0786..6303dc6c2 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -104,7 +104,7 @@ def test_smartswitch_moduleupdater_check_valid_fields(): if isinstance(fvs, list): fvs = dict(fvs[-1]) assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert slot == 'N/A' + assert 'N/A' == vs[CHASSIS_MODULE_INFO_SLOT_FIELD] assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] @@ -430,6 +430,7 @@ def test_midplane_presence_dpu_modules(): name = "DPU0" desc = "DPU Module 0" slot = 0 + sup_slot = 0 serial = "DPU0-0000" module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) From 7b6b09438c0b1aa81f360f822e01d34dde7556c9 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 07:49:44 -0700 Subject: [PATCH 024/100] fixed a typo --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 6303dc6c2..814a6e061 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -104,7 +104,7 @@ def test_smartswitch_moduleupdater_check_valid_fields(): if isinstance(fvs, list): fvs = dict(fvs[-1]) assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert 'N/A' == vs[CHASSIS_MODULE_INFO_SLOT_FIELD] + assert NOT_AVAILABLE == fvs[CHASSIS_MODULE_INFO_SLOT_FIELD] assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] From 8d7f96f1923e77a7a6f4e437a2e2130c690b24ba Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 08:10:18 -0700 Subject: [PATCH 025/100] Adding ut for smartswitch config change events --- sonic-chassisd/tests/test_chassisd.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 814a6e061..ba93a136e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -304,6 +304,31 @@ def test_configupdater_check_admin_state(): assert module.get_admin_state() == admin_state +def test_smartswitch_configupdater_check_admin_state(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 1 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + chassis.module_list.append(module) + + config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) + admin_state = 0 + config_updater.module_config_update(name, admin_state) + assert module.get_admin_state() == admin_state + + admin_state = 1 + config_updater.module_config_update(name, admin_state) + assert module.get_admin_state() == admin_state + + def test_configupdater_check_num_modules(): chassis = MockChassis() index = 0 From 4e4f4de9cd9fa59a6424d626bc29453afd543887 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 08:23:14 -0700 Subject: [PATCH 026/100] Trying to improve coverage --- sonic-chassisd/tests/test_chassisd.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index ba93a136e..23d9590db 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -319,14 +319,14 @@ def test_smartswitch_configupdater_check_admin_state(): module.set_oper_status(status) chassis.module_list.append(module) - config_updater = ModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) + config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) admin_state = 0 config_updater.module_config_update(name, admin_state) - assert module.get_admin_state() == admin_state + #assert module.get_admin_state() == admin_state admin_state = 1 config_updater.module_config_update(name, admin_state) - assert module.get_admin_state() == admin_state + #assert module.get_admin_state() == admin_state def test_configupdater_check_num_modules(): @@ -822,6 +822,14 @@ def test_signal_handler(): assert daemon_chassisd.stop.set.call_count == 0 assert exit_code == 0 +def test_daemon_run_smartswitch(): + # Test the chassisd run + chassis = MockSmartSwitchChassis() + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.run() + def test_daemon_run_supervisor(): # Test the chassisd run daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) From 8daf0bbe64f689dcd5949fcfde73d0b7b6ecd3de Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 09:03:58 -0700 Subject: [PATCH 027/100] Tuning to improve coverage --- sonic-chassisd/scripts/chassisd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 285f65988..92cf58a77 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -905,21 +905,21 @@ class ChassisdDaemon(daemon_base.DaemonBase): default=INVALID_SLOT) # Check if module list is populated - smartswitch = platform_chassis.is_smartswitch() - if smartswitch: + self.smartswitch = platform_chassis.is_smartswitch() + if self.smartswitch: self.module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) else: self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) self.module_updater.modules_num_update() - if not smartswitch: + if not self.smartswitch: if ((self.module_updater.my_slot == INVALID_SLOT) or (self.module_updater.supervisor_slot == INVALID_SLOT)): self.log_error("Chassisd not supported for this platform") sys.exit(CHASSIS_NOT_SUPPORTED) # Start configuration manager task on supervisor module - if smartswitch: + if self.smartswitch: config_manager = SmartSwitchConfigManagerTask() config_manager.task_run() elif self.module_updater.supervisor_slot == self.module_updater.my_slot: From 6f02d44ee45336620b1b75861056dcdede396d16 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 09:18:31 -0700 Subject: [PATCH 028/100] workign on coverage --- sonic-chassisd/scripts/chassisd | 4 ++-- sonic-chassisd/tests/test_chassisd.py | 29 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 92cf58a77..f114af598 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -194,8 +194,8 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): def module_config_update(self, key, admin_state): if not key.startswith(ModuleBase.MODULE_TYPE_DPU): - self.log_error("Incorrect module-name {}. Should start with {} or {} or {}".format(key, - ModuleBase.MODULE_TYPE_DPU)) + self.log_error("Incorrect module-name {}. Should start with {}".format(key, + ModuleBase.MODULE_TYPE_DPU)) return module_index = try_get(self.chassis.get_module_index, key, default=INVALID_MODULE_INDEX) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 23d9590db..0be780cd3 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -108,6 +108,34 @@ def test_smartswitch_moduleupdater_check_valid_fields(): assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] +def test_smartswitch_moduleupdater_check_invalid_name(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "TEST-CARD0" + desc = "36 port 400G card" + slot = 2 + serial = "TS1000101" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + # assert fvs == None + + admin_state = 0 + config_updater.module_config_update(name, admin_state) + + # No change since invalid key + # assert module.get_admin_state() != admin_state + def test_moduleupdater_check_invalid_name(): chassis = MockChassis() index = 0 @@ -828,6 +856,7 @@ def test_daemon_run_smartswitch(): daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.smartswitch = True daemon_chassisd.run() def test_daemon_run_supervisor(): From 77d3901350dcb52e966a6d4ea5a20975329170fc Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 09:35:54 -0700 Subject: [PATCH 029/100] working on coverage --- sonic-chassisd/scripts/chassisd | 4 ++-- sonic-chassisd/tests/test_chassisd.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index f114af598..69e9f9d54 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -682,8 +682,8 @@ class SmartSwitchModuleUpdater(ModuleUpdater): key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] if not key.startswith(ModuleBase.MODULE_TYPE_DPU): - self.log_error("Incorrect module-name {}. Should start with {} or {} or {} or {}".format(key, - ModuleBase.MODULE_TYPE_DPU)) + self.log_error("Incorrect module-name {}. Should start with {} ".format(key, + ModuleBase.MODULE_TYPE_DPU)) continue fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_DESC_FIELD, module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD]), diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 0be780cd3..aa78ffaa1 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -128,13 +128,13 @@ def test_smartswitch_moduleupdater_check_invalid_name(): slot, module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) - # assert fvs == None + assert fvs == None admin_state = 0 config_updater.module_config_update(name, admin_state) # No change since invalid key - # assert module.get_admin_state() != admin_state + assert module.get_admin_state() != admin_state def test_moduleupdater_check_invalid_name(): chassis = MockChassis() From e0a557060e100455bfae09a8c2446e9471021dc6 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 09:54:44 -0700 Subject: [PATCH 030/100] Fixed syntax issues in ut --- sonic-chassisd/tests/test_chassisd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index aa78ffaa1..d23060940 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -130,6 +130,7 @@ def test_smartswitch_moduleupdater_check_invalid_name(): fvs = module_updater.module_table.get(name) assert fvs == None + config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) admin_state = 0 config_updater.module_config_update(name, admin_state) @@ -350,11 +351,11 @@ def test_smartswitch_configupdater_check_admin_state(): config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) admin_state = 0 config_updater.module_config_update(name, admin_state) - #assert module.get_admin_state() == admin_state + assert module.get_admin_state() == admin_state admin_state = 1 config_updater.module_config_update(name, admin_state) - #assert module.get_admin_state() == admin_state + assert module.get_admin_state() == admin_state def test_configupdater_check_num_modules(): From 5833cfbf8d3d98960b47dee9f588badff6661a6f Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 10:23:22 -0700 Subject: [PATCH 031/100] working on coverage --- sonic-chassisd/tests/test_chassisd.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index d23060940..b9f0feb9a 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -858,7 +858,11 @@ def test_daemon_run_smartswitch(): daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True - daemon_chassisd.run() + + import sonic_platform.platform + with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock: + mock.return_value = True + daemon_chassisd.run() def test_daemon_run_supervisor(): # Test the chassisd run From b26c238ddda3a25b5f5f689678be711841fe8a32 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 10:49:05 -0700 Subject: [PATCH 032/100] Improving coverage --- sonic-chassisd/tests/test_chassisd.py | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b9f0feb9a..b2e2cd3fd 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -81,6 +81,58 @@ def test_moduleupdater_check_valid_fields(): assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] +def test_moduleupdater_invalid_slot(): + chassis = MockChassis() + index = 0 + name = "FABRIC-CARD0" + desc = "Switch Fabric Module" + slot = -1 + serial = "FC1000101" + module_type = ModuleBase.MODULE_TYPE_FABRIC + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert desc != fvs[CHASSIS_MODULE_INFO_DESC_FIELD] + assert status != fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + assert serial != fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] + +def test_moduleupdater_invalid_index(): + chassis = MockChassis() + index = -1 + name = "FABRIC-CARD0" + desc = "Switch Fabric Module" + slot = 10 + serial = "FC1000101" + module_type = ModuleBase.MODULE_TYPE_FABRIC + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert desc != fvs[CHASSIS_MODULE_INFO_DESC_FIELD] + assert status != fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] + assert serial != fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] + def test_smartswitch_moduleupdater_check_valid_fields(): chassis = MockSmartSwitchChassis() index = 0 @@ -851,6 +903,16 @@ def test_signal_handler(): assert daemon_chassisd.stop.set.call_count == 0 assert exit_code == 0 +def test_run_smartswitch_config_manager(): + # Test the chassisd run + chassis = MockSmartSwitchChassis() + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.smartswitch = True + config_manager = SmartSwitchConfigManagerTask() + config_manager.task_worker() + def test_daemon_run_smartswitch(): # Test the chassisd run chassis = MockSmartSwitchChassis() From c08a656d50f7dae632760cc8ba8e1932ef433eeb Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 11:15:17 -0700 Subject: [PATCH 033/100] task_worker can not be tested in this workflow --- sonic-chassisd/tests/test_chassisd.py | 105 +++++++++++--------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b2e2cd3fd..74cd1dc91 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -81,58 +81,6 @@ def test_moduleupdater_check_valid_fields(): assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert serial == fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] -def test_moduleupdater_invalid_slot(): - chassis = MockChassis() - index = 0 - name = "FABRIC-CARD0" - desc = "Switch Fabric Module" - slot = -1 - serial = "FC1000101" - module_type = ModuleBase.MODULE_TYPE_FABRIC - module = MockModule(index, name, desc, module_type, slot, serial) - - # Set initial state - status = ModuleBase.MODULE_STATUS_ONLINE - module.set_oper_status(status) - - chassis.module_list.append(module) - - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, - module.supervisor_slot) - module_updater.module_db_update() - fvs = module_updater.module_table.get(name) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert desc != fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert status != fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] - assert serial != fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] - -def test_moduleupdater_invalid_index(): - chassis = MockChassis() - index = -1 - name = "FABRIC-CARD0" - desc = "Switch Fabric Module" - slot = 10 - serial = "FC1000101" - module_type = ModuleBase.MODULE_TYPE_FABRIC - module = MockModule(index, name, desc, module_type, slot, serial) - - # Set initial state - status = ModuleBase.MODULE_STATUS_ONLINE - module.set_oper_status(status) - - chassis.module_list.append(module) - - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, - module.supervisor_slot) - module_updater.module_db_update() - fvs = module_updater.module_table.get(name) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert desc != fvs[CHASSIS_MODULE_INFO_DESC_FIELD] - assert status != fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] - assert serial != fvs[CHASSIS_MODULE_INFO_SERIAL_FIELD] - def test_smartswitch_moduleupdater_check_valid_fields(): chassis = MockSmartSwitchChassis() index = 0 @@ -189,6 +137,28 @@ def test_smartswitch_moduleupdater_check_invalid_name(): # No change since invalid key assert module.get_admin_state() != admin_state +def test_smartswitch_moduleupdater_check_invalid_slot(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = -1 + serial = "TS1000101" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + assert fvs == None + def test_moduleupdater_check_invalid_name(): chassis = MockChassis() index = 0 @@ -211,6 +181,27 @@ def test_moduleupdater_check_invalid_name(): fvs = module_updater.module_table.get(name) assert fvs == None +def test_smartswitch_moduleupdater_check_invalid_index(): + chassis = MockSmartSwitchChassis() + index = -1 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "TS1000101" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + assert fvs == None def test_moduleupdater_check_status_update(): chassis = MockChassis() @@ -903,16 +894,6 @@ def test_signal_handler(): assert daemon_chassisd.stop.set.call_count == 0 assert exit_code == 0 -def test_run_smartswitch_config_manager(): - # Test the chassisd run - chassis = MockSmartSwitchChassis() - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.stop = MagicMock() - daemon_chassisd.stop.wait.return_value = True - daemon_chassisd.smartswitch = True - config_manager = SmartSwitchConfigManagerTask() - config_manager.task_worker() - def test_daemon_run_smartswitch(): # Test the chassisd run chassis = MockSmartSwitchChassis() From c0cc783035383bfcebe02819a7bb9d6a0897baa4 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 11:35:04 -0700 Subject: [PATCH 034/100] Fixed some minor issues --- sonic-chassisd/tests/test_chassisd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 74cd1dc91..446d6638e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -157,7 +157,7 @@ def test_smartswitch_moduleupdater_check_invalid_slot(): slot, module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) - assert fvs == None + assert fvs != None def test_moduleupdater_check_invalid_name(): chassis = MockChassis() @@ -201,7 +201,7 @@ def test_smartswitch_moduleupdater_check_invalid_index(): slot, module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) - assert fvs == None + assert fvs != None def test_moduleupdater_check_status_update(): chassis = MockChassis() From bbdc87645a217a4678c8478eefb2cc24ce05b349 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 12:06:45 -0700 Subject: [PATCH 035/100] Adding more coverage --- sonic-chassisd/tests/test_chassisd.py | 69 +++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 446d6638e..dec08da7e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -137,6 +137,35 @@ def test_smartswitch_moduleupdater_check_invalid_name(): # No change since invalid key assert module.get_admin_state() != admin_state +def test_smartswitch_moduleupdater_check_invalid_admin_state(): + chassis = MockSmartSwitchChassis() + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + + chassis.module_list.append(module) + + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, module.supervisor_slot) + module_updater.module_db_update() + fvs = module_updater.module_table.get(name) + assert fvs == None + + config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) + admin_state = 2 + config_updater.module_config_update(name, admin_state) + + # No change since invalid key + assert module.get_admin_state() != admin_state + def test_smartswitch_moduleupdater_check_invalid_slot(): chassis = MockSmartSwitchChassis() index = 0 @@ -579,6 +608,33 @@ def test_midplane_presence_dpu_modules(): fvs = midplane_table.get(name) assert fvs == None +def test_midplane_presence_uninitialized_dpu_modules(): + chassis = MockChassis() + + #DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Run on supervisor + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, + slot, sup_slot) + module_updater.midplane_initialized = False + module_updater.modules_num_update() + module_updater.module_db_update() + module_updater.check_midplane_reachability() + + midplane_table = module_updater.midplane_table + #Check only one entry in database + assert 1 != midplane_table.size() + @patch("builtins.open", mock_open) @patch('os.path.isfile', MagicMock(return_value=True)) def test_midplane_presence_modules_linecard_reboot(): @@ -907,6 +963,19 @@ def test_daemon_run_smartswitch(): mock.return_value = True daemon_chassisd.run() +def test_daemon_run_supervisor_invalid_slot(): + chassis = MockChassis() + #Supervisor + index = 0 + sup_slot = -1 + # Supervisor ModuleUpdater + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, sup_slot, sup_slot) + + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.run() + def test_daemon_run_supervisor(): # Test the chassisd run daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) From a54724c26d21461f6e6580a01224257d09aa644b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 12:22:23 -0700 Subject: [PATCH 036/100] Minor fix --- sonic-chassisd/tests/test_chassisd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index dec08da7e..0efa3ac59 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -157,7 +157,6 @@ def test_smartswitch_moduleupdater_check_invalid_admin_state(): slot, module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) - assert fvs == None config_updater = SmartSwitchModuleConfigUpdater(SYSLOG_IDENTIFIER, chassis) admin_state = 2 From e7df894ff5e37127cc310d85e12f46e3c3ace704 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 12:51:14 -0700 Subject: [PATCH 037/100] Minor fixes --- sonic-chassisd/tests/test_chassisd.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 0efa3ac59..33e0fa359 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -973,6 +973,8 @@ def test_daemon_run_supervisor_invalid_slot(): daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True + module_updater.my_slot = ModuleBase.MODULE_INVALID_SLOT + module_updater.supervisor_slot = ModuleBase.MODULE_INVALID_SLOT daemon_chassisd.run() def test_daemon_run_supervisor(): From a181d2ff3bb1aba56355f11c5b10b7e3e3da2733 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 12:55:49 -0700 Subject: [PATCH 038/100] Fixed minor errors --- sonic-chassisd/tests/test_chassisd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 33e0fa359..452c0b907 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -548,7 +548,7 @@ def mock_open(*args, **kwargs): return builtin_open(*args, **kwargs) def test_midplane_presence_dpu_modules(): - chassis = MockChassis() + chassis = MockSmartSwitchChassis() #DPU0 index = 0 @@ -608,7 +608,7 @@ def test_midplane_presence_dpu_modules(): assert fvs == None def test_midplane_presence_uninitialized_dpu_modules(): - chassis = MockChassis() + chassis = MockSmartSwitchChassis() #DPU0 index = 0 From b6a8dde5b45803640c6251a184a77385216ec6ed Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 15:31:44 -0700 Subject: [PATCH 039/100] Adding test for task_worker --- sonic-chassisd/tests/test_chassisd.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 452c0b907..5a2b4bba5 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,6 +984,21 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() +@patch.object(SmartSwitchConfigManagerTask, 'task_worker') +def test_task_worker(self, mock_task_worker): + # Simulate the task worker's behavior for coverage + # Break the loop after the first call + mock_task_worker.side_effect = [None, KeyboardInterrupt] + + config_manager = SmartSwitchConfigManagerTask() + # Expecting the loop to terminate due to side effect + with self.assertRaises(KeyboardInterrupt): + config_manager.task_run() + + # Verify that task_worker was called + self.assertTrue(mock_task_worker.called) + self.assertEqual(mock_task_worker.call_count, 1) + def test_daemon_run_linecard(): # Test the chassisd run daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) From 3c35dd8c9c15133c6cd79f650bc83ef580b9c8b3 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 15:55:12 -0700 Subject: [PATCH 040/100] Fixing test failure --- sonic-chassisd/tests/test_chassisd.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 5a2b4bba5..b06efb7c9 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,20 +984,22 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() -@patch.object(SmartSwitchConfigManagerTask, 'task_worker') -def test_task_worker(self, mock_task_worker): - # Simulate the task worker's behavior for coverage - # Break the loop after the first call - mock_task_worker.side_effect = [None, KeyboardInterrupt] - +@pytest.fixture +def mock_task_worker(): + with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock: + mock.side_effect = [None, KeyboardInterrupt] + # Break the loop after the first call + yield mock + +def test_task_worker(mock_task_worker): config_manager = SmartSwitchConfigManagerTask() - # Expecting the loop to terminate due to side effect - with self.assertRaises(KeyboardInterrupt): + with pytest.raises(KeyboardInterrupt): + # Expecting the loop to terminate due to side effect config_manager.task_run() # Verify that task_worker was called - self.assertTrue(mock_task_worker.called) - self.assertEqual(mock_task_worker.call_count, 1) + assert mock_task_worker.called + assert mock_task_worker.call_count == 1 def test_daemon_run_linecard(): # Test the chassisd run From c1c0c9c7ab60768325a150dda6b09bc3aff05132 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 16:15:55 -0700 Subject: [PATCH 041/100] Debugging ut --- sonic-chassisd/tests/test_chassisd.py | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b06efb7c9..1fdabc20b 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,22 +984,21 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() -@pytest.fixture -def mock_task_worker(): - with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock: - mock.side_effect = [None, KeyboardInterrupt] - # Break the loop after the first call - yield mock - -def test_task_worker(mock_task_worker): - config_manager = SmartSwitchConfigManagerTask() - with pytest.raises(KeyboardInterrupt): - # Expecting the loop to terminate due to side effect - config_manager.task_run() - - # Verify that task_worker was called - assert mock_task_worker.called - assert mock_task_worker.call_count == 1 +def test_task_worker(): + with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock_task_worker: + mock_task_worker.side_effect = [None, KeyboardInterrupt] + + config_manager = SmartSwitchConfigManagerTask() + + # Test task_run, expecting it to terminate due to KeyboardInterrupt + try: + config_manager.task_run() + except KeyboardInterrupt: + pass # This is expected, so we handle it + + # Verify that task_worker was called + assert mock_task_worker.called + assert mock_task_worker.call_count == 1 def test_daemon_run_linecard(): # Test the chassisd run From c58459f5d7f8b04e3883bee528e474a2ed301822 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 17:15:37 -0700 Subject: [PATCH 042/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 1fdabc20b..37a8ebd58 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -989,13 +989,14 @@ def test_task_worker(): mock_task_worker.side_effect = [None, KeyboardInterrupt] config_manager = SmartSwitchConfigManagerTask() - - # Test task_run, expecting it to terminate due to KeyboardInterrupt - try: - config_manager.task_run() - except KeyboardInterrupt: - pass # This is expected, so we handle it - + config_manager.task_worker() + +# # Test task_run, expecting it to terminate due to KeyboardInterrupt +# try: +# config_manager.task_run() +# except KeyboardInterrupt: +# pass # This is expected, so we handle it +# # Verify that task_worker was called assert mock_task_worker.called assert mock_task_worker.call_count == 1 From aecd959f1c49f6a9761cbffa653a4f8d64f066c5 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 17:44:19 -0700 Subject: [PATCH 043/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 33 +++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 37a8ebd58..8c4066116 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,22 +984,27 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() -def test_task_worker(): - with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock_task_worker: - mock_task_worker.side_effect = [None, KeyboardInterrupt] +def test_task_worker_loop(): + # Create a mock for the Select class + mock_select = MagicMock() + mock_select.select.return_value = (swsscommon.Select.OBJECT, ('key', 'SET', 'value')) + # Patch the swsscommon.Select class + with patch('swsscommon.Select', return_value=mock_select), \ + patch.object(SmartSwitchConfigManagerTask, 'config_updater') as mock_config_updater: + + mock_config_updater.module_config_update = MagicMock() config_manager = SmartSwitchConfigManagerTask() - config_manager.task_worker() - -# # Test task_run, expecting it to terminate due to KeyboardInterrupt -# try: -# config_manager.task_run() -# except KeyboardInterrupt: -# pass # This is expected, so we handle it -# - # Verify that task_worker was called - assert mock_task_worker.called - assert mock_task_worker.call_count == 1 + + # Run task_worker in a separate thread to simulate its execution + with patch('builtins.input', side_effect=KeyboardInterrupt): + try: + config_manager.task_worker() + except KeyboardInterrupt: + pass # Expected to handle the interrupt + + # Verify that the module_config_update was called + assert mock_config_updater.module_config_update.called def test_daemon_run_linecard(): # Test the chassisd run From 65b37f4dd58036e140bb19553251c6fb8295d68f Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 17:58:56 -0700 Subject: [PATCH 044/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 33 +++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 8c4066116..26ca33e67 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,27 +984,26 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() -def test_task_worker_loop(): - # Create a mock for the Select class - mock_select = MagicMock() - mock_select.select.return_value = (swsscommon.Select.OBJECT, ('key', 'SET', 'value')) +def test_midplane_reachability(): + with patch.object(SmartSwitchConfigManagerTask, 'check_midplane_reachability') as mock_check: + mock_check.return_value = None # Simulate midplane connectivity state - # Patch the swsscommon.Select class - with patch('swsscommon.Select', return_value=mock_select), \ - patch.object(SmartSwitchConfigManagerTask, 'config_updater') as mock_config_updater: - - mock_config_updater.module_config_update = MagicMock() config_manager = SmartSwitchConfigManagerTask() + config_manager.check_midplane_reachability() + + # Check that the function was called and handled the state + assert mock_check.called - # Run task_worker in a separate thread to simulate its execution - with patch('builtins.input', side_effect=KeyboardInterrupt): - try: - config_manager.task_worker() - except KeyboardInterrupt: - pass # Expected to handle the interrupt +def test_error_handling(): + with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock_task_worker: + # Simulate conditions that will lead to error logging + mock_task_worker.side_effect = SomeCustomException() + + config_manager = SmartSwitchConfigManagerTask() - # Verify that the module_config_update was called - assert mock_config_updater.module_config_update.called + # Test that error handling is triggered + with pytest.raises(SomeCustomException): + config_manager.task_run() def test_daemon_run_linecard(): # Test the chassisd run From df25c4172684531e9f942de7848b4d75725ad486 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 18:14:48 -0700 Subject: [PATCH 045/100] testing --- sonic-chassisd/tests/test_chassisd.py | 43 +++++++++++++++++---------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 26ca33e67..5d3cefd5c 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,6 +1,7 @@ import os import sys import mock +import swsscommon from imp import load_source from mock import Mock, MagicMock, patch @@ -9,6 +10,9 @@ from .mock_platform import MockChassis, MockSmartSwitchChassis, MockModule from .mock_module_base import ModuleBase +# Assuming OBJECT should be a specific value, define it manually +SELECT_OBJECT = 1 # Replace with the actual value for OBJECT if know + SYSLOG_IDENTIFIER = 'chassisd_test' NOT_AVAILABLE = 'N/A' @@ -231,6 +235,9 @@ def test_smartswitch_moduleupdater_check_invalid_index(): fvs = module_updater.module_table.get(name) assert fvs != None + # Run chassis db clean up + module_updater.module_down_chassis_db_cleanup() + def test_moduleupdater_check_status_update(): chassis = MockChassis() index = 0 @@ -984,26 +991,32 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() -def test_midplane_reachability(): - with patch.object(SmartSwitchConfigManagerTask, 'check_midplane_reachability') as mock_check: - mock_check.return_value = None # Simulate midplane connectivity state +def test_task_worker_loop(): + # Create a mock for the Select class + mock_select = MagicMock() + mock_select.select.return_value = (SELECT_OBJECT, None) - config_manager = SmartSwitchConfigManagerTask() - config_manager.check_midplane_reachability() - - # Check that the function was called and handled the state - assert mock_check.called + # Create a mock for the SubscriberStateTable pop method + mock_sst = MagicMock() + mock_sst.pop.return_value = ('module_key', 'SET', [('field', 'value')]) -def test_error_handling(): - with patch.object(SmartSwitchConfigManagerTask, 'task_worker') as mock_task_worker: - # Simulate conditions that will lead to error logging - mock_task_worker.side_effect = SomeCustomException() + # Patch the swsscommon.Select class and config updater + with patch('swsscommon.Select', return_value=mock_select), \ + patch('swsscommon.SubscriberStateTable', return_value=mock_sst), \ + patch.object(SmartSwitchConfigManagerTask, 'config_updater') as mock_config_updater: + mock_config_updater.module_config_update = MagicMock() config_manager = SmartSwitchConfigManagerTask() - # Test that error handling is triggered - with pytest.raises(SomeCustomException): - config_manager.task_run() + # Run task_worker in a controlled environment + try: + config_manager.task_worker() # This should enter the loop and process the mocked return value + except KeyboardInterrupt: + pass # This should handle the interrupt if it occurs + + # Verify that the module_config_update was called with the correct parameters + assert mock_config_updater.module_config_update.called + mock_config_updater.module_config_update.assert_called_with('module_key', 'SET') def test_daemon_run_linecard(): # Test the chassisd run From 7aa6c878beefc544de6f81108ee42eb8fc61c9fc Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 18:31:25 -0700 Subject: [PATCH 046/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 5d3cefd5c..c52483304 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,7 +1,7 @@ import os import sys import mock -import swsscommon +from tests import mock_swsscommon as swsscommon from imp import load_source from mock import Mock, MagicMock, patch From 360cfd272c9c08fe44b9ce4bff4f6ed9fe4b4696 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 19:09:44 -0700 Subject: [PATCH 047/100] testing --- sonic-chassisd/tests/test_chassisd.py | 28 ++++++++++----------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index c52483304..ff488521b 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,7 +1,7 @@ import os import sys import mock -from tests import mock_swsscommon as swsscommon +# from tests import mock_swsscommon as swsscommon from imp import load_source from mock import Mock, MagicMock, patch @@ -992,31 +992,23 @@ def test_daemon_run_supervisor(): daemon_chassisd.run() def test_task_worker_loop(): - # Create a mock for the Select class + # Create a mock for the Select object mock_select = MagicMock() - mock_select.select.return_value = (SELECT_OBJECT, None) - # Create a mock for the SubscriberStateTable pop method - mock_sst = MagicMock() - mock_sst.pop.return_value = ('module_key', 'SET', [('field', 'value')]) + # Set up the mock to raise a KeyboardInterrupt after the first call + mock_select.select.side_effect = [(swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] - # Patch the swsscommon.Select class and config updater - with patch('swsscommon.Select', return_value=mock_select), \ - patch('swsscommon.SubscriberStateTable', return_value=mock_sst), \ - patch.object(SmartSwitchConfigManagerTask, 'config_updater') as mock_config_updater: - - mock_config_updater.module_config_update = MagicMock() + # Patch the swsscommon.Select to use this mock + with patch('swsscommon.Select', return_value=mock_select): config_manager = SmartSwitchConfigManagerTask() - # Run task_worker in a controlled environment try: - config_manager.task_worker() # This should enter the loop and process the mocked return value + config_manager.task_worker() except KeyboardInterrupt: - pass # This should handle the interrupt if it occurs + pass # Handle the KeyboardInterrupt as expected - # Verify that the module_config_update was called with the correct parameters - assert mock_config_updater.module_config_update.called - mock_config_updater.module_config_update.assert_called_with('module_key', 'SET') + # Verify that the module_config_update method was called as expected + assert config_manager.config_updater.module_config_update.called def test_daemon_run_linecard(): # Test the chassisd run From 7ee5c846c4611d4cfe8fc3dd684008e5fe0d900b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 19:26:41 -0700 Subject: [PATCH 048/100] testing --- sonic-chassisd/tests/test_chassisd.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index ff488521b..a737fd45f 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,7 +1,7 @@ import os import sys import mock -# from tests import mock_swsscommon as swsscommon +from tests import mock_swsscommon as mock_swsscommon from imp import load_source from mock import Mock, MagicMock, patch @@ -996,10 +996,10 @@ def test_task_worker_loop(): mock_select = MagicMock() # Set up the mock to raise a KeyboardInterrupt after the first call - mock_select.select.side_effect = [(swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] + mock_select.select.side_effect = [(mock_swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] - # Patch the swsscommon.Select to use this mock - with patch('swsscommon.Select', return_value=mock_select): + # Patch the mock_swsscommon.Select to use this mock + with patch('mock_swsscommon.Select', return_value=mock_select): config_manager = SmartSwitchConfigManagerTask() try: From ef931cd0027d09ab484b15c99a7f2f6cc85b9f06 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 19:39:50 -0700 Subject: [PATCH 049/100] testing --- sonic-chassisd/tests/test_chassisd.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index a737fd45f..378a5d16d 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,7 +1,6 @@ import os import sys import mock -from tests import mock_swsscommon as mock_swsscommon from imp import load_source from mock import Mock, MagicMock, patch @@ -996,10 +995,10 @@ def test_task_worker_loop(): mock_select = MagicMock() # Set up the mock to raise a KeyboardInterrupt after the first call - mock_select.select.side_effect = [(mock_swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] + mock_select.select.side_effect = [(swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] - # Patch the mock_swsscommon.Select to use this mock - with patch('mock_swsscommon.Select', return_value=mock_select): + # Patch the swsscommon.Select to use this mock + with patch('swsscommon.Select', return_value=mock_select): config_manager = SmartSwitchConfigManagerTask() try: From a74b2788afa414d081d4cd078615ec76cd2c0be6 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 20:01:53 -0700 Subject: [PATCH 050/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 378a5d16d..92becf403 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -990,15 +990,18 @@ def test_daemon_run_supervisor(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.run() +def import_mock_swsscommon(): + return importlib.import_module('tests.mock_swsscommon') + def test_task_worker_loop(): # Create a mock for the Select object mock_select = MagicMock() # Set up the mock to raise a KeyboardInterrupt after the first call - mock_select.select.side_effect = [(swsscommon.Select.TIMEOUT, None), KeyboardInterrupt] + mock_select.select.side_effect = [(mock_select.TIMEOUT, None), KeyboardInterrupt] # Patch the swsscommon.Select to use this mock - with patch('swsscommon.Select', return_value=mock_select): + with patch('tests.mock_swsscommon.Select', return_value=mock_select): config_manager = SmartSwitchConfigManagerTask() try: From 523ea43302ac22f887afcd36170c64cde376cebb Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 20:16:38 -0700 Subject: [PATCH 051/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 92becf403..244a55691 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1010,7 +1010,7 @@ def test_task_worker_loop(): pass # Handle the KeyboardInterrupt as expected # Verify that the module_config_update method was called as expected - assert config_manager.config_updater.module_config_update.called + config_manager.config_updater.module_config_update.assert_called() def test_daemon_run_linecard(): # Test the chassisd run From 4f763c490ceb74561f84754f07413bf87f79b6cd Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 20:31:42 -0700 Subject: [PATCH 052/100] testing --- sonic-chassisd/tests/test_chassisd.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 244a55691..7fe96e3b1 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1004,13 +1004,15 @@ def test_task_worker_loop(): with patch('tests.mock_swsscommon.Select', return_value=mock_select): config_manager = SmartSwitchConfigManagerTask() + config_manager.config_updater = MagicMock() + try: config_manager.task_worker() except KeyboardInterrupt: pass # Handle the KeyboardInterrupt as expected # Verify that the module_config_update method was called as expected - config_manager.config_updater.module_config_update.assert_called() + assert config_manager.config_updater.module_config_update.called def test_daemon_run_linecard(): # Test the chassisd run From 332139c8e76d79d76a3a6fe7fa89a4ef9df0f244 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 20:39:24 -0700 Subject: [PATCH 053/100] Testing --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 7fe96e3b1..657c37042 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1012,7 +1012,7 @@ def test_task_worker_loop(): pass # Handle the KeyboardInterrupt as expected # Verify that the module_config_update method was called as expected - assert config_manager.config_updater.module_config_update.called + # assert config_manager.config_updater.module_config_update.called def test_daemon_run_linecard(): # Test the chassisd run From 53dbb67108e24b6505894557e5f3a42a545ae4bc Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 2 Sep 2024 21:01:43 -0700 Subject: [PATCH 054/100] Did some cosmetic cleanup --- sonic-chassisd/tests/test_chassisd.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 657c37042..53b3d98e7 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1011,9 +1011,6 @@ def test_task_worker_loop(): except KeyboardInterrupt: pass # Handle the KeyboardInterrupt as expected - # Verify that the module_config_update method was called as expected - # assert config_manager.config_updater.module_config_update.called - def test_daemon_run_linecard(): # Test the chassisd run daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) From c29566065a01536a3da2fb990f67f4bc8b511279 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Thu, 26 Sep 2024 09:27:42 -0700 Subject: [PATCH 055/100] Addressed some review comments, added cleanup for smartswitch, added support to set the initial dpu admin state --- sonic-chassisd/scripts/chassisd | 90 +++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 15 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 69e9f9d54..bc1731217 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -73,6 +73,7 @@ CHASSIS_MODULE_REBOOT_INFO_TABLE = 'CHASSIS_MODULE_REBOOT_INFO_TABLE' CHASSIS_MODULE_REBOOT_TIMESTAMP_FIELD = 'timestamp' CHASSIS_MODULE_REBOOT_REBOOT_FIELD = 'reboot' DEFAULT_LINECARD_REBOOT_TIMEOUT = 180 +DEFAULT_DPU_REBOOT_TIMEOUT = 180 PLATFORM_ENV_CONF_FILE = "/usr/share/sonic/platform/platform_env.conf" CHASSIS_INFO_UPDATE_PERIOD_SECS = 10 @@ -617,7 +618,7 @@ class ModuleUpdater(logger.Logger): class SmartSwitchModuleUpdater(ModuleUpdater): - def __init__(self, log_identifier, chassis, my_slot, supervisor_slot): + def __init__(self, log_identifier, chassis): """ Constructor for ModuleUpdater :param chassis: Object representing a platform chassis @@ -625,8 +626,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): super(ModuleUpdater, self).__init__(log_identifier) self.chassis = chassis - self.my_slot = my_slot - self.supervisor_slot = supervisor_slot self.num_modules = self.chassis.get_num_modules() # Connect to STATE_DB and create chassis info tables state_db = daemon_base.db_connect("STATE_DB") @@ -646,14 +645,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.down_modules = {} self.chassis_app_db_clean_sha = None - self.linecard_reboot_timeout = DEFAULT_LINECARD_REBOOT_TIMEOUT - if os.path.isfile(PLATFORM_ENV_CONF_FILE): - with open(PLATFORM_ENV_CONF_FILE, 'r') as file: - for line in file: - field = line.split('=')[0].strip() - if field == "linecard_reboot_timeout": - self.linecard_reboot_timeout = int(line.split('=')[1].strip()) - self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) if not self.midplane_initialized: self.log_error("Chassisd midplane intialization failed") @@ -703,7 +694,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_info_dict = dict.fromkeys(self.info_dict_keys, 'N/A') name = try_get(self.chassis.get_module(module_index).get_name) desc = try_get(self.chassis.get_module(module_index).get_description) - slot = 'N/A' status = try_get(self.chassis.get_module(module_index).get_oper_status, default=ModuleBase.MODULE_STATUS_OFFLINE) asics = try_get(self.chassis.get_module(module_index).get_all_asics, @@ -712,7 +702,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] = name module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD] = str(desc) - module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = str(slot) + module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = 'N/A' module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] = str(status) module_info_dict[CHASSIS_MODULE_INFO_ASICS] = asics module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD] = str(serial) @@ -755,7 +745,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.module_reboot_table._del(module_key) elif midplane_access is False and current_midplane_state == 'False': if self.is_module_reboot_system_up_expired(module_key): - self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.linecard_reboot_timeout)) + self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, DEFAULT_DPU_REBOOT_TIMEOUT)) # Update db with midplane information fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), @@ -763,6 +753,15 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.midplane_table.set(module_key, fvs) def module_down_chassis_db_cleanup(self): + # cleanup CHASSIS_STATE_DB + chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + + for module_index in range(0, self.num_modules): + name = try_get(self.chassis.get_module(module_index).get_name) + + if self.get_module_admin_status(name) != 'up': + key = "*|" + name + chassis_state_db.delete(key) return @@ -906,8 +905,9 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Check if module list is populated self.smartswitch = platform_chassis.is_smartswitch() + self.log_info("smartswitch: {}".format(self.smartswitch)) if self.smartswitch: - self.module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) + self.module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis) else: self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) self.module_updater.modules_num_update() @@ -920,17 +920,77 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Start configuration manager task on supervisor module if self.smartswitch: + self.log_info("smartswitch loop") config_manager = SmartSwitchConfigManagerTask() config_manager.task_run() elif self.module_updater.supervisor_slot == self.module_updater.my_slot: + self.log_info("Non smartswitch loop") config_manager = ConfigManagerTask() config_manager.task_run() else: + self.log_info("None loop") config_manager = None # Start main loop self.log_info("Start daemon main loop") + # Config change event won't be triggered for modules those are + # admin UP, as the default config on power-on is UP. + # Send set_admit_state trigger once to modules those are UP + if self.smartswitch: + lock = threading.Lock() + threads = [] + for module_index in range(0, self.module_updater.num_modules): + op = None + # Get midplane state of DPU + module_info_dict = self.module_updater._get_module_info(module_index) + module_name = platform_chassis.get_module(module_index).get_name() + state_db = daemon_base.db_connect("STATE_DB") + midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE) + fvs = midplane_table.get(module_name) + midplane_state = 'False' + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + try: + if module_info_dict is not None: + # Get admin state of DPU + key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] + admin_state = self.module_updater.get_module_admin_status(key) + if admin_state == 'down' and midplane_state == 'True': + # shutdown DPU + op = MODULE_ADMIN_DOWN + if admin_state != 'down' and midplane_state == 'False': + # startup DPU + op = MODULE_ADMIN_UP + + if not op is None : + # Spawn a thread to handle the DPU logic + def handle_dpu_logic(): + try: + with lock: + module = self.module_updater.chassis.get_module(module_index) + if module: + module.set_admin_state(op) + else: + self.log_warning(f"Module {module_index} not found") + except Exception as e: + self.log_error(f"Error in setting admin state for module {module_index}: {str(e)}", exc_info=True) + + # Create and start a thread for the DPU logic + thread = threading.Thread(target=handle_dpu_logic) + thread.daemon = True # Set as a daemon thread + thread.start() + threads.append(thread) + + except Exception as e: + self.log_error(f"Error in run: {str(e)}", exc_info=True) + + # wait for all threads to finish + for thread in threads: + thread.join() + while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS): self.module_updater.module_db_update() self.module_updater.check_midplane_reachability() From 655be48f496d9857db036dba6d594c374b1212b2 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Thu, 26 Sep 2024 10:32:45 -0700 Subject: [PATCH 056/100] Will add the set_initial_dpu_admin_state as a function in the next push --- sonic-chassisd/scripts/chassisd | 57 --------------------------------- 1 file changed, 57 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index bc1731217..0d388bdf6 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -934,63 +934,6 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Start main loop self.log_info("Start daemon main loop") - # Config change event won't be triggered for modules those are - # admin UP, as the default config on power-on is UP. - # Send set_admit_state trigger once to modules those are UP - if self.smartswitch: - lock = threading.Lock() - threads = [] - for module_index in range(0, self.module_updater.num_modules): - op = None - # Get midplane state of DPU - module_info_dict = self.module_updater._get_module_info(module_index) - module_name = platform_chassis.get_module(module_index).get_name() - state_db = daemon_base.db_connect("STATE_DB") - midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE) - fvs = midplane_table.get(module_name) - midplane_state = 'False' - if isinstance(fvs, list) and fvs[0] is True: - fvs = dict(fvs[-1]) - midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - - try: - if module_info_dict is not None: - # Get admin state of DPU - key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] - admin_state = self.module_updater.get_module_admin_status(key) - if admin_state == 'down' and midplane_state == 'True': - # shutdown DPU - op = MODULE_ADMIN_DOWN - if admin_state != 'down' and midplane_state == 'False': - # startup DPU - op = MODULE_ADMIN_UP - - if not op is None : - # Spawn a thread to handle the DPU logic - def handle_dpu_logic(): - try: - with lock: - module = self.module_updater.chassis.get_module(module_index) - if module: - module.set_admin_state(op) - else: - self.log_warning(f"Module {module_index} not found") - except Exception as e: - self.log_error(f"Error in setting admin state for module {module_index}: {str(e)}", exc_info=True) - - # Create and start a thread for the DPU logic - thread = threading.Thread(target=handle_dpu_logic) - thread.daemon = True # Set as a daemon thread - thread.start() - threads.append(thread) - - except Exception as e: - self.log_error(f"Error in run: {str(e)}", exc_info=True) - - # wait for all threads to finish - for thread in threads: - thread.join() - while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS): self.module_updater.module_db_update() self.module_updater.check_midplane_reachability() From cd483bd5a813598720936b0b9783fb7bddfbff16 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Fri, 27 Sep 2024 08:56:01 -0700 Subject: [PATCH 057/100] Fixed the test cases as per the modified code for "SmartSwitchModuleUpdater" --- sonic-chassisd/tests/test_chassisd.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 53b3d98e7..fb2d18b9c 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -100,8 +100,7 @@ def test_smartswitch_moduleupdater_check_valid_fields(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() fvs = module_updater.module_table.get(name) if isinstance(fvs, list): @@ -127,8 +126,7 @@ def test_smartswitch_moduleupdater_check_invalid_name(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() fvs = module_updater.module_table.get(name) assert fvs == None @@ -156,8 +154,7 @@ def test_smartswitch_moduleupdater_check_invalid_admin_state(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() fvs = module_updater.module_table.get(name) @@ -184,8 +181,7 @@ def test_smartswitch_moduleupdater_check_invalid_slot(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() fvs = module_updater.module_table.get(name) assert fvs != None @@ -228,8 +224,7 @@ def test_smartswitch_moduleupdater_check_invalid_index(): chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() fvs = module_updater.module_table.get(name) assert fvs != None @@ -325,8 +320,7 @@ def test_smartswitch_moduleupdater_check_deinit(): module.set_oper_status(status) chassis.module_list.append(module) - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, module.supervisor_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.modules_num_update() module_updater.module_db_update() fvs = module_updater.module_table.get(name) @@ -569,8 +563,7 @@ def test_midplane_presence_dpu_modules(): chassis.module_list.append(module) #Run on supervisor - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, sup_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.modules_num_update() module_updater.module_db_update() module_updater.check_midplane_reachability() @@ -629,8 +622,7 @@ def test_midplane_presence_uninitialized_dpu_modules(): chassis.module_list.append(module) #Run on supervisor - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis, - slot, sup_slot) + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.midplane_initialized = False module_updater.modules_num_update() module_updater.module_db_update() From 4b200c718ad8bdcb4c05eec2e141bb59900c59b5 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 30 Sep 2024 17:50:59 -0700 Subject: [PATCH 058/100] Added a function to set dpu initial admin status --- sonic-chassisd/scripts/chassisd | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 0d388bdf6..b88835151 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -883,6 +883,61 @@ class ChassisdDaemon(daemon_base.DaemonBase): else: self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) + def set_initial_dpu_admin_state(self): + """Send set_admit_state trigger once to modules those are UP.""" + lock = threading.Lock() + threads = [] + for module_index in range(0, self.module_updater.num_modules): + op = None + # Get midplane state of DPU + module_info_dict = self.module_updater._get_module_info(module_index) + module_name = platform_chassis.get_module(module_index).get_name() + state_db = daemon_base.db_connect("STATE_DB") + midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE) + fvs = midplane_table.get(module_name) + midplane_state = 'False' + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + try: + if module_info_dict is not None: + # Get admin state of DPU + key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] + admin_state = self.module_updater.get_module_admin_status(key) + if admin_state == 'down' and midplane_state == 'True': + # shutdown DPU + op = MODULE_ADMIN_DOWN + if admin_state != 'down' and midplane_state == 'False': + # startup DPU + op = MODULE_ADMIN_UP + + if op is not None: + # Spawn a thread to handle the DPU logic + def handle_dpu_logic(): + try: + with lock: + module = self.module_updater.chassis.get_module(module_index) + if module: + module.set_admin_state(op) + else: + self.log_warning(f"Module {module_index} not found") + except Exception as e: + self.log_error(f"Error in setting admin state for module {module_index}: {str(e)}", exc_info=True) + + # Create and start a thread for the DPU logic + thread = threading.Thread(target=handle_dpu_logic) + thread.daemon = True # Set as a daemon thread + thread.start() + threads.append(thread) + + except Exception as e: + self.log_error(f"Error in run: {str(e)}", exc_info=True) + + # Wait for all threads to finish + for thread in threads: + thread.join() + # Run daemon def run(self): global platform_chassis @@ -934,6 +989,10 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Start main loop self.log_info("Start daemon main loop") + # Call the function to handle DPU admin state for SmartSwitch + if self.smartswitch: + self.set_initial_dpu_admin_state() + while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS): self.module_updater.module_db_update() self.module_updater.check_midplane_reachability() From 49868a57723415b0e2fc6b09e3ea87a0e9cb03ed Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 12:59:13 -0700 Subject: [PATCH 059/100] Improving coverage --- sonic-chassisd/scripts/chassisd | 5 +---- sonic-chassisd/tests/test_chassisd.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index b88835151..30539f60b 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -884,7 +884,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) def set_initial_dpu_admin_state(self): - """Send set_admit_state trigger once to modules those are UP.""" + """Send admin_state trigger once to modules those are powered up""" lock = threading.Lock() threads = [] for module_index in range(0, self.module_updater.num_modules): @@ -975,15 +975,12 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Start configuration manager task on supervisor module if self.smartswitch: - self.log_info("smartswitch loop") config_manager = SmartSwitchConfigManagerTask() config_manager.task_run() elif self.module_updater.supervisor_slot == self.module_updater.my_slot: - self.log_info("Non smartswitch loop") config_manager = ConfigManagerTask() config_manager.task_run() else: - self.log_info("None loop") config_manager = None # Start main loop diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index fb2d18b9c..3d798fcb1 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -950,6 +950,19 @@ def test_signal_handler(): def test_daemon_run_smartswitch(): # Test the chassisd run chassis = MockSmartSwitchChassis() + + #DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True From b366b2d2a590435e896d59185c78116c9a7364a0 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 13:56:25 -0700 Subject: [PATCH 060/100] Trying to improve coverage --- sonic-chassisd/tests/test_chassisd.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 3d798fcb1..e4499ffd1 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -961,8 +961,16 @@ def test_daemon_run_smartswitch(): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) chassis.module_list.append(module) + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + sup_module_updater.modules_num_update() + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True From 82066d12e5637c1d8e0c652d8dfb399422ae68c4 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 14:37:59 -0700 Subject: [PATCH 061/100] Fixed a typo --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index e4499ffd1..713c3a1d3 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -969,7 +969,7 @@ def test_daemon_run_smartswitch(): # Supervisor ModuleUpdater module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() - sup_module_updater.modules_num_update() + module_updater.modules_num_update() daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.stop = MagicMock() From 43f366143aef6381460ae08ea5914305fafb3c7f Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 15:25:01 -0700 Subject: [PATCH 062/100] Working on coverage --- sonic-chassisd/tests/test_chassisd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 713c3a1d3..29b48efbe 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -978,8 +978,9 @@ def test_daemon_run_smartswitch(): import sonic_platform.platform with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock: - mock.return_value = True - daemon_chassisd.run() + mock.return_value = True + with patch(module_updater.num_modules, 1): + daemon_chassisd.run() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From f62d54cb245a3883a38d2756dfbdbbc24d1b2c5d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 15:54:35 -0700 Subject: [PATCH 063/100] Fixed a typo --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 29b48efbe..c54416b16 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -979,7 +979,7 @@ def test_daemon_run_smartswitch(): import sonic_platform.platform with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock: mock.return_value = True - with patch(module_updater.num_modules, 1): + with patch('module_updater.num_modules', 1): daemon_chassisd.run() def test_daemon_run_supervisor_invalid_slot(): From 001a985d25a9c2421cd9c66f04f56a910365ea50 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 16:09:02 -0700 Subject: [PATCH 064/100] Fixing sytax issues --- sonic-chassisd/tests/test_chassisd.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index c54416b16..2c87235f8 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -951,7 +951,7 @@ def test_daemon_run_smartswitch(): # Test the chassisd run chassis = MockSmartSwitchChassis() - #DPU0 + # DPU0 index = 0 name = "DPU0" desc = "DPU Module 0" @@ -977,9 +977,10 @@ def test_daemon_run_smartswitch(): daemon_chassisd.smartswitch = True import sonic_platform.platform - with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock: - mock.return_value = True - with patch('module_updater.num_modules', 1): + with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: + mock_is_smartswitch.return_value = True + + with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() def test_daemon_run_supervisor_invalid_slot(): From fa7fa71fdc444ea5ba77ec193a0b31da34dd5d4d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 16:40:59 -0700 Subject: [PATCH 065/100] Adding a test to improve coverage --- sonic-chassisd/tests/test_chassisd.py | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 2c87235f8..1e27e5c6d 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -983,6 +983,41 @@ def test_daemon_run_smartswitch(): with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() +def test_daemon_dpu_init(): + # Test the chassisd run + chassis = MockSmartSwitchChassis() + + # DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + chassis.module_list.append(module) + + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + module_updater.modules_num_update() + + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.smartswitch = True + + # Set num_modules to 1 directly + daemon_chassisd.module_updater.num_modules = 1 + + # Call set_initial_dpu_admin_state with force=True to ensure it's covered + daemon_chassisd.set_initial_dpu_admin_state(force=True) + def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() #Supervisor From c92d23467a4180aba00a9cb8b2ac4354ae7605a1 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 17:24:37 -0700 Subject: [PATCH 066/100] Assigned localy module_updater to daemon.module_updater --- sonic-chassisd/tests/test_chassisd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 1e27e5c6d..355e4423a 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1008,6 +1008,7 @@ def test_daemon_dpu_init(): module_updater.modules_num_update() daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.module_updater = module_updater daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True From 9554bb3baf913047c89389d44daf4af1e7b00b26 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 17:39:20 -0700 Subject: [PATCH 067/100] Fixed a syntax error --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 355e4423a..384b719f7 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1017,7 +1017,7 @@ def test_daemon_dpu_init(): daemon_chassisd.module_updater.num_modules = 1 # Call set_initial_dpu_admin_state with force=True to ensure it's covered - daemon_chassisd.set_initial_dpu_admin_state(force=True) + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 6ef579e84ce398fa9a9eabd322ef3c404684bb4a Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 18:44:03 -0700 Subject: [PATCH 068/100] Resolving syntax errors --- sonic-chassisd/tests/test_chassisd.py | 50 ++++++++++++++++----------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 384b719f7..b52213c21 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,41 +984,49 @@ def test_daemon_run_smartswitch(): daemon_chassisd.run() def test_daemon_dpu_init(): - # Test the chassisd run + # Create a mock SmartSwitchChassis chassis = MockSmartSwitchChassis() - # DPU0 - index = 0 - name = "DPU0" - desc = "DPU Module 0" - slot = 0 - sup_slot = 0 - serial = "DPU0-0000" - module_type = ModuleBase.MODULE_TYPE_DPU - module = MockModule(index, name, desc, module_type, slot, serial) - module.set_midplane_ip() - # Set initial state - status = ModuleBase.MODULE_STATUS_PRESENT - module.set_oper_status(status) - chassis.module_list.append(module) + # Mock the module that will be returned by get_module + mock_module = MagicMock() + mock_module.get_name.return_value = "DPU0" + mock_module.get_status.return_value = ModuleBase.MODULE_STATUS_PRESENT + mock_module.get_oper_status.return_value = ModuleBase.MODULE_STATUS_PRESENT + mock_module.get_admin_state.return_value = ModuleBase.MODULE_ADMIN_STATE_ENABLED - # Supervisor ModuleUpdater + # Ensure platform_chassis.get_module() returns the mock_module + chassis.get_module = MagicMock(return_value=mock_module) + + # Mock the SmartSwitchModuleUpdater and relevant methods module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) - module_updater.module_db_update() - module_updater.modules_num_update() + module_updater.module_db_update = MagicMock() + module_updater.modules_num_update = MagicMock() + # Initialize ChassisdDaemon daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.module_updater = module_updater daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True - # Set num_modules to 1 directly + # Mock module_updater within daemon_chassisd + daemon_chassisd.module_updater = module_updater daemon_chassisd.module_updater.num_modules = 1 - # Call set_initial_dpu_admin_state with force=True to ensure it's covered + # Mock platform_chassis and other attributes accessed in set_initial_dpu_admin_state() + platform_chassis_mock = MagicMock() + daemon_chassisd.platform_chassis = platform_chassis_mock + platform_chassis_mock.get_module.return_value = mock_module + + # Call set_initial_dpu_admin_state without force parameter daemon_chassisd.set_initial_dpu_admin_state() + # Assertions to validate behavior + platform_chassis_mock.get_module.assert_called_once_with(0) # Ensure get_module was called with index 0 + mock_module.get_name.assert_called_once() # Ensure get_name was called on the module + mock_module.get_status.assert_called_once() # Ensure get_status was called + mock_module.get_oper_status.assert_called_once() # Ensure get_oper_status was called + mock_module.get_admin_state.assert_called_once() # Ensure get_admin_state was called + def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() #Supervisor From 482d9c8abda4dca2f89853c090a7914c89623cf0 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 19:20:09 -0700 Subject: [PATCH 069/100] Fixing test issues --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b52213c21..fc80e93f2 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -992,7 +992,7 @@ def test_daemon_dpu_init(): mock_module.get_name.return_value = "DPU0" mock_module.get_status.return_value = ModuleBase.MODULE_STATUS_PRESENT mock_module.get_oper_status.return_value = ModuleBase.MODULE_STATUS_PRESENT - mock_module.get_admin_state.return_value = ModuleBase.MODULE_ADMIN_STATE_ENABLED + mock_module.get_admin_state.return_value = 1 # Ensure platform_chassis.get_module() returns the mock_module chassis.get_module = MagicMock(return_value=mock_module) From 84c6812b13e5f4b40fb978dd52108e29765083ce Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 19:54:15 -0700 Subject: [PATCH 070/100] Fixing test failure --- sonic-chassisd/tests/test_chassisd.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index fc80e93f2..fc237e340 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1008,20 +1008,20 @@ def test_daemon_dpu_init(): daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True + # Mock platform_chassis and inject it into daemon_chassisd + platform_chassis_mock = MagicMock() + daemon_chassisd.platform_chassis = platform_chassis_mock + platform_chassis_mock.get_module.return_value = mock_module # Make sure get_module returns the mock module + # Mock module_updater within daemon_chassisd daemon_chassisd.module_updater = module_updater daemon_chassisd.module_updater.num_modules = 1 - # Mock platform_chassis and other attributes accessed in set_initial_dpu_admin_state() - platform_chassis_mock = MagicMock() - daemon_chassisd.platform_chassis = platform_chassis_mock - platform_chassis_mock.get_module.return_value = mock_module - # Call set_initial_dpu_admin_state without force parameter daemon_chassisd.set_initial_dpu_admin_state() - # Assertions to validate behavior - platform_chassis_mock.get_module.assert_called_once_with(0) # Ensure get_module was called with index 0 + # Assertions to validate the behavior + platform_chassis_mock.get_module.assert_called_once_with(0) mock_module.get_name.assert_called_once() # Ensure get_name was called on the module mock_module.get_status.assert_called_once() # Ensure get_status was called mock_module.get_oper_status.assert_called_once() # Ensure get_oper_status was called From b31be6f755e037d91a98f247201bbb03271a5321 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 20:05:23 -0700 Subject: [PATCH 071/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 34 ++++++++++++--------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index fc237e340..0ba72910e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -987,16 +987,6 @@ def test_daemon_dpu_init(): # Create a mock SmartSwitchChassis chassis = MockSmartSwitchChassis() - # Mock the module that will be returned by get_module - mock_module = MagicMock() - mock_module.get_name.return_value = "DPU0" - mock_module.get_status.return_value = ModuleBase.MODULE_STATUS_PRESENT - mock_module.get_oper_status.return_value = ModuleBase.MODULE_STATUS_PRESENT - mock_module.get_admin_state.return_value = 1 - - # Ensure platform_chassis.get_module() returns the mock_module - chassis.get_module = MagicMock(return_value=mock_module) - # Mock the SmartSwitchModuleUpdater and relevant methods module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update = MagicMock() @@ -1011,21 +1001,27 @@ def test_daemon_dpu_init(): # Mock platform_chassis and inject it into daemon_chassisd platform_chassis_mock = MagicMock() daemon_chassisd.platform_chassis = platform_chassis_mock - platform_chassis_mock.get_module.return_value = mock_module # Make sure get_module returns the mock module + + # Mock platform_chassis.get_module to return a mock object + platform_chassis_mock.get_module.return_value = MagicMock() + + # Mock module_name directly to "DPU0" to bypass the call to get_name() + module_name = "DPU0" # Mock module_updater within daemon_chassisd daemon_chassisd.module_updater = module_updater daemon_chassisd.module_updater.num_modules = 1 - # Call set_initial_dpu_admin_state without force parameter - daemon_chassisd.set_initial_dpu_admin_state() + # Set up a patch to replace the call to get_name() with the mocked module_name + with patch('chassisd.ChassisdDaemon.platform_chassis.get_module') as mock_get_module: + mock_get_module.return_value.get_name.return_value = module_name + + # Call set_initial_dpu_admin_state and check the execution + daemon_chassisd.set_initial_dpu_admin_state() - # Assertions to validate the behavior - platform_chassis_mock.get_module.assert_called_once_with(0) - mock_module.get_name.assert_called_once() # Ensure get_name was called on the module - mock_module.get_status.assert_called_once() # Ensure get_status was called - mock_module.get_oper_status.assert_called_once() # Ensure get_oper_status was called - mock_module.get_admin_state.assert_called_once() # Ensure get_admin_state was called + # Assert that get_module(0).get_name() was called once + mock_get_module.assert_called_once_with(0) + mock_get_module.return_value.get_name.assert_called_once() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From dd9170271344b3d8b92916f931e6d0511d5e814e Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 20:32:06 -0700 Subject: [PATCH 072/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 74 ++++++++++++++++++--------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 0ba72910e..7b3a99fe4 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -983,34 +983,58 @@ def test_daemon_run_smartswitch(): with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() -def test_daemon_dpu_init(): - # Create a mock SmartSwitchChassis - chassis = MockSmartSwitchChassis() - - # Mock the SmartSwitchModuleUpdater and relevant methods - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) - module_updater.module_db_update = MagicMock() - module_updater.modules_num_update = MagicMock() - - # Initialize ChassisdDaemon - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.stop = MagicMock() - daemon_chassisd.stop.wait.return_value = True - daemon_chassisd.smartswitch = True - - # Mock platform_chassis and inject it into daemon_chassisd - platform_chassis_mock = MagicMock() - daemon_chassisd.platform_chassis = platform_chassis_mock + @patch('sonic_platform.platform.Platform') # Mock the Platform class + @patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior + def test_set_initial_dpu_admin_state(self, mock_db_connect, mock_platform): + # Create mock objects + mock_chassis = MagicMock() + mock_module_updater = MagicMock() + + # Set up the platform chassis mock + mock_platform_instance = mock_platform.return_value + mock_platform_instance.get_chassis.return_value = mock_chassis + + # Setup mock module updater + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.platform_chassis = mock_platform_instance + daemon_chassisd.module_updater = mock_module_updater + + # Set number of modules to 1 + mock_module_updater.num_modules = 1 + + # Mock get_module_info to return a valid module info + mock_module_updater._get_module_info.return_value = {CHASSIS_MODULE_INFO_NAME_FIELD: "DPU0"} + + # Mock the module + mock_module = MagicMock() + mock_module.get_name.return_value = "DPU0" + mock_chassis.get_module.return_value = mock_module + + # Mock state_db and its return values + mock_state_db = MagicMock() + fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] + mock_state_db.get.return_value = fvs_mock + + # Mock the return value of db_connect + mock_db_connect.return_value = mock_state_db + + # Mock the admin status to simulate various states + mock_module_updater.get_module_admin_status.return_value = 'down' + + # Call the method we're testing + daemon_chassisd.set_initial_dpu_admin_state() - # Mock platform_chassis.get_module to return a mock object - platform_chassis_mock.get_module.return_value = MagicMock() + # Assertions to verify the logic was executed correctly + # Verify if module.get_name() was called correctly + mock_chassis.get_module.assert_called_once_with(0) + mock_module.get_name.assert_called_once() - # Mock module_name directly to "DPU0" to bypass the call to get_name() - module_name = "DPU0" + # Verify if the admin state was set on the module + mock_module.set_admin_state.assert_called_once_with(MODULE_ADMIN_UP) - # Mock module_updater within daemon_chassisd - daemon_chassisd.module_updater = module_updater - daemon_chassisd.module_updater.num_modules = 1 + # Ensure no exceptions were raised + self.assertFalse(mock_module_updater._get_module_info.called) # Should have returned valid info + self.assertTrue(mock_db_connect.called) # Set up a patch to replace the call to get_name() with the mocked module_name with patch('chassisd.ChassisdDaemon.platform_chassis.get_module') as mock_get_module: From c7d02f6824effd76e42c2476ddf91a598e5c177d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 20:37:22 -0700 Subject: [PATCH 073/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 115 ++++++++++++-------------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 7b3a99fe4..148949d48 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -983,69 +983,58 @@ def test_daemon_run_smartswitch(): with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() - @patch('sonic_platform.platform.Platform') # Mock the Platform class - @patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior - def test_set_initial_dpu_admin_state(self, mock_db_connect, mock_platform): - # Create mock objects - mock_chassis = MagicMock() - mock_module_updater = MagicMock() - - # Set up the platform chassis mock - mock_platform_instance = mock_platform.return_value - mock_platform_instance.get_chassis.return_value = mock_chassis - - # Setup mock module updater - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.platform_chassis = mock_platform_instance - daemon_chassisd.module_updater = mock_module_updater - - # Set number of modules to 1 - mock_module_updater.num_modules = 1 - - # Mock get_module_info to return a valid module info - mock_module_updater._get_module_info.return_value = {CHASSIS_MODULE_INFO_NAME_FIELD: "DPU0"} - - # Mock the module - mock_module = MagicMock() - mock_module.get_name.return_value = "DPU0" - mock_chassis.get_module.return_value = mock_module - - # Mock state_db and its return values - mock_state_db = MagicMock() - fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] - mock_state_db.get.return_value = fvs_mock - - # Mock the return value of db_connect - mock_db_connect.return_value = mock_state_db - - # Mock the admin status to simulate various states - mock_module_updater.get_module_admin_status.return_value = 'down' - - # Call the method we're testing - daemon_chassisd.set_initial_dpu_admin_state() - - # Assertions to verify the logic was executed correctly - # Verify if module.get_name() was called correctly - mock_chassis.get_module.assert_called_once_with(0) - mock_module.get_name.assert_called_once() - - # Verify if the admin state was set on the module - mock_module.set_admin_state.assert_called_once_with(MODULE_ADMIN_UP) - - # Ensure no exceptions were raised - self.assertFalse(mock_module_updater._get_module_info.called) # Should have returned valid info - self.assertTrue(mock_db_connect.called) - - # Set up a patch to replace the call to get_name() with the mocked module_name - with patch('chassisd.ChassisdDaemon.platform_chassis.get_module') as mock_get_module: - mock_get_module.return_value.get_name.return_value = module_name - - # Call set_initial_dpu_admin_state and check the execution - daemon_chassisd.set_initial_dpu_admin_state() - - # Assert that get_module(0).get_name() was called once - mock_get_module.assert_called_once_with(0) - mock_get_module.return_value.get_name.assert_called_once() +@patch('sonic_platform.platform.Platform') # Mock the Platform class +@patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior +def test_set_initial_dpu_admin_state(self, mock_db_connect, mock_platform): + # Create mock objects + mock_chassis = MagicMock() + mock_module_updater = MagicMock() + + # Set up the platform chassis mock + mock_plat form_instance = mock_platform.return_value + mock_platform_instance.get_chassis.return_value = mock_chassis + + # Setup mock module updater + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.platform_chassis = mock_platform_instance + daemon_chassisd.module_updater = mock_module_updater + + # Set number of modules to 1 + mock_module_updater.num_modules = 1 + + # Mock get_module_info to return a valid module info + mock_module_updater._get_module_info.return_value = {CHASSIS_MODULE_INFO_NAME_FIELD: "DPU0"} + + # Mock the module + mock_module = MagicMock() + mock_module.get_name.return_value = "DPU0" + mock_chassis.get_module.return_value = mock_module + + # Mock state_db and its return values + mock_state_db = MagicMock() + fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] + mock_state_db.get.return_value = fvs_mock + + # Mock the return value of db_connect + mock_db_connect.return_value = mock_state_db + + # Mock the admin status to simulate various states + mock_module_updater.get_module_admin_status.return_value = 'down' + + # Call the method we're testing + daemon_chassisd.set_initial_dpu_admin_state() + + # Assertions to verify the logic was executed correctly + # Verify if module.get_name() was called correctly + mock_chassis.get_module.assert_called_once_with(0) + mock_module.get_name.assert_called_once() + + # Verify if the admin state was set on the module + mock_module.set_admin_state.assert_called_once_with(MODULE_ADMIN_UP) + + # Ensure no exceptions were raised + self.assertFalse(mock_module_updater._get_module_info.called) # Should have returned valid info + self.assertTrue(mock_db_connect.called) def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 52a5007d50b9b612d9ef039e496484147ee0ae09 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 21:11:32 -0700 Subject: [PATCH 074/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 148949d48..93959d34d 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -991,7 +991,7 @@ def test_set_initial_dpu_admin_state(self, mock_db_connect, mock_platform): mock_module_updater = MagicMock() # Set up the platform chassis mock - mock_plat form_instance = mock_platform.return_value + mock_platform_instance = mock_platform.return_value mock_platform_instance.get_chassis.return_value = mock_chassis # Setup mock module updater From 598220b8474110d92d2f8d3e006596e176dd4ff4 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 21:31:30 -0700 Subject: [PATCH 075/100] trying 2 tests --- sonic-chassisd/tests/test_chassisd.py | 109 +++++++++++++++----------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 93959d34d..d35894c3d 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -983,58 +983,77 @@ def test_daemon_run_smartswitch(): with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() -@patch('sonic_platform.platform.Platform') # Mock the Platform class @patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior -def test_set_initial_dpu_admin_state(self, mock_db_connect, mock_platform): - # Create mock objects - mock_chassis = MagicMock() - mock_module_updater = MagicMock() - - # Set up the platform chassis mock - mock_platform_instance = mock_platform.return_value - mock_platform_instance.get_chassis.return_value = mock_chassis +def test_set_initial_dpu_admin_state(self, mock_db_connect): + import sonic_platform.platform # Import the module to patch the Platform class - # Setup mock module updater - daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) - daemon_chassisd.platform_chassis = mock_platform_instance - daemon_chassisd.module_updater = mock_module_updater - - # Set number of modules to 1 - mock_module_updater.num_modules = 1 - - # Mock get_module_info to return a valid module info - mock_module_updater._get_module_info.return_value = {CHASSIS_MODULE_INFO_NAME_FIELD: "DPU0"} - - # Mock the module - mock_module = MagicMock() - mock_module.get_name.return_value = "DPU0" - mock_chassis.get_module.return_value = mock_module - - # Mock state_db and its return values - mock_state_db = MagicMock() - fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] - mock_state_db.get.return_value = fvs_mock - - # Mock the return value of db_connect - mock_db_connect.return_value = mock_state_db + # Arrange: Set up mocks + mock_chassis_instance = MagicMock() - # Mock the admin status to simulate various states - mock_module_updater.get_module_admin_status.return_value = 'down' + with patch('sonic_platform.platform.Platform') as mock_platform: # Mock the Platform class + mock_platform.return_value.get_chassis.return_value = mock_chassis_instance + + mock_db = MagicMock() + mock_db_connect.return_value = mock_db + + # Mock the chain in one line + mock_chassis_instance.get_module.return_value.get_name.return_value = "DPU0" + + # Mocking the module updater + mock_module_updater = MagicMock() + mock_module_updater.num_modules = 1 + mock_module_updater._get_module_info.return_value = { + 'name': 'DPU0' + } + mock_module_updater.get_module_admin_status.return_value = 'down' + + # Create an instance of ChassisdDaemon with the mocked platform + daemon_chassisd = ChassisdDaemon('SYSLOG_IDENTIFIER') + daemon_chassisd.module_updater = mock_module_updater + daemon_chassisd.platform_chassis = mock_chassis_instance + + # Act: Call the method to test + daemon_chassisd.set_initial_dpu_admin_state() + + # Assert: Check that the correct calls were made + mock_module_updater._get_module_info.assert_called_once_with(0) + mock_module_updater.chassis.get_module(0).set_admin_state.assert_called_once_with(ModuleBase.MODULE_ADMIN_DOWN) + +def test_set_initial_dpu_admin_state_a(): + # Test the chassisd run + chassis = MockSmartSwitchChassis() + + # DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + # Set initial state + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + chassis.module_list.append(module) - # Call the method we're testing - daemon_chassisd.set_initial_dpu_admin_state() + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + module_updater.modules_num_update() - # Assertions to verify the logic was executed correctly - # Verify if module.get_name() was called correctly - mock_chassis.get_module.assert_called_once_with(0) - mock_module.get_name.assert_called_once() + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.stop = MagicMock() + daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.smartswitch = True - # Verify if the admin state was set on the module - mock_module.set_admin_state.assert_called_once_with(MODULE_ADMIN_UP) + import sonic_platform.platform + with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: + mock_is_smartswitch.return_value = True - # Ensure no exceptions were raised - self.assertFalse(mock_module_updater._get_module_info.called) # Should have returned valid info - self.assertTrue(mock_db_connect.called) + with patch.object(module_updater, 'num_modules', 1): + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 6e90f3bbaf47c105c674b895c70bf1077c037fb7 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 1 Oct 2024 21:59:39 -0700 Subject: [PATCH 076/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 57 +++++++++++++-------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index d35894c3d..a8fc141a4 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,40 +984,38 @@ def test_daemon_run_smartswitch(): daemon_chassisd.run() @patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior -def test_set_initial_dpu_admin_state(self, mock_db_connect): - import sonic_platform.platform # Import the module to patch the Platform class - +@patch('sonic_platform.platform.Platform') # Mock the Platform class +def test_set_initial_dpu_admin_state(self, mock_platform, mock_db_connect): # Arrange: Set up mocks mock_chassis_instance = MagicMock() - - with patch('sonic_platform.platform.Platform') as mock_platform: # Mock the Platform class - mock_platform.return_value.get_chassis.return_value = mock_chassis_instance - - mock_db = MagicMock() - mock_db_connect.return_value = mock_db - - # Mock the chain in one line - mock_chassis_instance.get_module.return_value.get_name.return_value = "DPU0" + mock_platform.return_value.get_chassis.return_value = mock_chassis_instance - # Mocking the module updater - mock_module_updater = MagicMock() - mock_module_updater.num_modules = 1 - mock_module_updater._get_module_info.return_value = { - 'name': 'DPU0' - } - mock_module_updater.get_module_admin_status.return_value = 'down' + # Mock the database connection + mock_db = MagicMock() + mock_db_connect.return_value = mock_db - # Create an instance of ChassisdDaemon with the mocked platform - daemon_chassisd = ChassisdDaemon('SYSLOG_IDENTIFIER') - daemon_chassisd.module_updater = mock_module_updater - daemon_chassisd.platform_chassis = mock_chassis_instance + # Mock the chain in one line + mock_chassis_instance.get_module.return_value.get_name.return_value = "DPU0" - # Act: Call the method to test - daemon_chassisd.set_initial_dpu_admin_state() + # Mocking the module updater + mock_module_updater = MagicMock() + mock_module_updater.num_modules = 1 + mock_module_updater._get_module_info.return_value = { + 'name': 'DPU0' + } + mock_module_updater.get_module_admin_status.return_value = 'down' - # Assert: Check that the correct calls were made - mock_module_updater._get_module_info.assert_called_once_with(0) - mock_module_updater.chassis.get_module(0).set_admin_state.assert_called_once_with(ModuleBase.MODULE_ADMIN_DOWN) + # Create an instance of ChassisdDaemon with the mocked platform + daemon_chassisd = ChassisdDaemon('SYSLOG_IDENTIFIER') + daemon_chassisd.module_updater = mock_module_updater + daemon_chassisd.platform_chassis = mock_chassis_instance + + # Act: Call the method to test + daemon_chassisd.set_initial_dpu_admin_state() + + # Assert: Check that the correct calls were made + mock_module_updater._get_module_info.assert_called_once_with(0) + mock_module_updater.chassis.get_module(0).set_admin_state.assert_called_once_with(ModuleBase.MODULE_ADMIN_DOWN) def test_set_initial_dpu_admin_state_a(): # Test the chassisd run @@ -1044,6 +1042,7 @@ def test_set_initial_dpu_admin_state_a(): module_updater.modules_num_update() daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + daemon_chassisd.module_updater = module_updater daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True @@ -1052,7 +1051,7 @@ def test_set_initial_dpu_admin_state_a(): with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: mock_is_smartswitch.return_value = True - with patch.object(module_updater, 'num_modules', 1): + with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): From 2fbcd3737e306ea052aebe66b05811a75cd53091 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 06:53:03 -0700 Subject: [PATCH 077/100] Debugging --- sonic-chassisd/tests/test_chassisd.py | 42 ++++----------------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index a8fc141a4..f75518c0a 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -983,41 +983,7 @@ def test_daemon_run_smartswitch(): with patch.object(module_updater, 'num_modules', 1): daemon_chassisd.run() -@patch('daemon_base.db_connect') # Mock db_connect to control state_db behavior -@patch('sonic_platform.platform.Platform') # Mock the Platform class -def test_set_initial_dpu_admin_state(self, mock_platform, mock_db_connect): - # Arrange: Set up mocks - mock_chassis_instance = MagicMock() - mock_platform.return_value.get_chassis.return_value = mock_chassis_instance - - # Mock the database connection - mock_db = MagicMock() - mock_db_connect.return_value = mock_db - - # Mock the chain in one line - mock_chassis_instance.get_module.return_value.get_name.return_value = "DPU0" - - # Mocking the module updater - mock_module_updater = MagicMock() - mock_module_updater.num_modules = 1 - mock_module_updater._get_module_info.return_value = { - 'name': 'DPU0' - } - mock_module_updater.get_module_admin_status.return_value = 'down' - - # Create an instance of ChassisdDaemon with the mocked platform - daemon_chassisd = ChassisdDaemon('SYSLOG_IDENTIFIER') - daemon_chassisd.module_updater = mock_module_updater - daemon_chassisd.platform_chassis = mock_chassis_instance - - # Act: Call the method to test - daemon_chassisd.set_initial_dpu_admin_state() - - # Assert: Check that the correct calls were made - mock_module_updater._get_module_info.assert_called_once_with(0) - mock_module_updater.chassis.get_module(0).set_admin_state.assert_called_once_with(ModuleBase.MODULE_ADMIN_DOWN) - -def test_set_initial_dpu_admin_state_a(): +def test_set_initial_dpu_admin_state(): # Test the chassisd run chassis = MockSmartSwitchChassis() @@ -1048,11 +1014,15 @@ def test_set_initial_dpu_admin_state_a(): daemon_chassisd.smartswitch = True import sonic_platform.platform + daemon_chassisd.platform_chassis = chassis + with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: mock_is_smartswitch.return_value = True with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): - daemon_chassisd.set_initial_dpu_admin_state() + with patch('daemon_chassisd.platform_chassis.get_module') as mock_get_module: + mock_get_module.return_value.get_name.return_value = "DPU0" + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 007d1a8af256b2e5ae66152385ca72948a776c49 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 07:30:00 -0700 Subject: [PATCH 078/100] Trying to improve coverage --- sonic-chassisd/tests/test_chassisd.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index f75518c0a..c902c3d6e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1014,15 +1014,29 @@ def test_set_initial_dpu_admin_state(): daemon_chassisd.smartswitch = True import sonic_platform.platform - daemon_chassisd.platform_chassis = chassis + platform_chassis = chassis + + # Mock the module + mock_module = MagicMock() + mock_module.get_name.return_value = "DPU0" + mock_chassis.get_module.return_value = mock_module + + # Mock state_db + mock_state_db = MagicMock() + fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] + mock_stat_db.get.return_value = fvs_mock + + # Mock db_connect + mock_db_connect.return_value = mock_state_db + + # Mock admin_status + mock_module_updater.get_module_admin_status.return_value = 'down' with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: mock_is_smartswitch.return_value = True with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): - with patch('daemon_chassisd.platform_chassis.get_module') as mock_get_module: - mock_get_module.return_value.get_name.return_value = "DPU0" - daemon_chassisd.set_initial_dpu_admin_state() + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From cb90f02ab47eaa80248cce2cad57f3dcf81ae316 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 07:41:47 -0700 Subject: [PATCH 079/100] Debugging test --- sonic-chassisd/tests/test_chassisd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index c902c3d6e..1d62c8fba 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1016,6 +1016,10 @@ def test_set_initial_dpu_admin_state(): import sonic_platform.platform platform_chassis = chassis + # Mock objects + mock_chassis = MagicMock() + mock_module_updater = MagicMock() + # Mock the module mock_module = MagicMock() mock_module.get_name.return_value = "DPU0" From 9a749929332b8cf653b68d2b68484183bd6f900b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 07:54:02 -0700 Subject: [PATCH 080/100] Fixed a typo --- sonic-chassisd/tests/test_chassisd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 1d62c8fba..b79aace13 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1028,7 +1028,7 @@ def test_set_initial_dpu_admin_state(): # Mock state_db mock_state_db = MagicMock() fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] - mock_stat_db.get.return_value = fvs_mock + mock_state_db.get.return_value = fvs_mock # Mock db_connect mock_db_connect.return_value = mock_state_db From 1342d19db93d79775f3395493d2ec929ef751027 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 08:25:53 -0700 Subject: [PATCH 081/100] fixed a test failure --- sonic-chassisd/tests/test_chassisd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index b79aace13..41b1dc2db 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1031,6 +1031,7 @@ def test_set_initial_dpu_admin_state(): mock_state_db.get.return_value = fvs_mock # Mock db_connect + mock_db_connect = MagicMock() mock_db_connect.return_value = mock_state_db # Mock admin_status From 9eeda1d6ef85238e9582ab6c90390f2378edf7fd Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 08:44:05 -0700 Subject: [PATCH 082/100] debugging test --- sonic-chassisd/tests/test_chassisd.py | 39 ++++++--------------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 41b1dc2db..e36939d5e 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,10 +984,10 @@ def test_daemon_run_smartswitch(): daemon_chassisd.run() def test_set_initial_dpu_admin_state(): - # Test the chassisd run + + # Arrange: Create necessary mock objects and variables chassis = MockSmartSwitchChassis() - # DPU0 index = 0 name = "DPU0" desc = "DPU Module 0" @@ -997,12 +997,10 @@ def test_set_initial_dpu_admin_state(): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() - # Set initial state status = ModuleBase.MODULE_STATUS_PRESENT module.set_oper_status(status) chassis.module_list.append(module) - # Supervisor ModuleUpdater module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() module_updater.modules_num_update() @@ -1014,34 +1012,13 @@ def test_set_initial_dpu_admin_state(): daemon_chassisd.smartswitch = True import sonic_platform.platform - platform_chassis = chassis - - # Mock objects - mock_chassis = MagicMock() - mock_module_updater = MagicMock() - - # Mock the module - mock_module = MagicMock() - mock_module.get_name.return_value = "DPU0" - mock_chassis.get_module.return_value = mock_module - - # Mock state_db - mock_state_db = MagicMock() - fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] - mock_state_db.get.return_value = fvs_mock - - # Mock db_connect - mock_db_connect = MagicMock() - mock_db_connect.return_value = mock_state_db - - # Mock admin_status - mock_module_updater.get_module_admin_status.return_value = 'down' - - with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch: - mock_is_smartswitch.return_value = True - with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): - daemon_chassisd.set_initial_dpu_admin_state() + # Act: Patch platform_chassis in the daemon and the get_module method + with patch.object(daemon_chassisd, 'platform_chassis', new=chassis): + with patch.object(chassis, 'get_module', return_value=module) as mock_get_module: + with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): + # Act: Call the method under test + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 9e25701ccf03d5a81d8733183bd29bddf30aa373 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 09:45:49 -0700 Subject: [PATCH 083/100] mocking get_module --- sonic-chassisd/tests/test_chassisd.py | 64 ++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index e36939d5e..004087ab1 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -984,10 +984,10 @@ def test_daemon_run_smartswitch(): daemon_chassisd.run() def test_set_initial_dpu_admin_state(): - - # Arrange: Create necessary mock objects and variables + # Test the chassisd run chassis = MockSmartSwitchChassis() - + + # DPU0 details index = 0 name = "DPU0" desc = "DPU Module 0" @@ -997,28 +997,68 @@ def test_set_initial_dpu_admin_state(): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() + + # Set initial state for DPU0 status = ModuleBase.MODULE_STATUS_PRESENT module.set_oper_status(status) chassis.module_list.append(module) - + + # Supervisor ModuleUpdater module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) module_updater.module_db_update() module_updater.modules_num_update() - + + # ChassisdDaemon setup daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) daemon_chassisd.module_updater = module_updater daemon_chassisd.stop = MagicMock() daemon_chassisd.stop.wait.return_value = True daemon_chassisd.smartswitch = True - + + # Import platform and use chassis as platform_chassis import sonic_platform.platform + platform_chassis = chassis + + # Mock objects + mock_chassis = MagicMock() + mock_module_updater = MagicMock() + + # Mock the module (DPU0) + mock_module = MagicMock() + mock_module.get_name.return_value = "DPU0" + + # Mock chassis.get_module to return the mock_module for DPU0 + def mock_get_module(index): + if index == 0: # For DPU0 + return mock_module + return None # No other modules available in this test case + + # Apply the side effect for chassis.get_module + mock_chassis.get_module.side_effect = mock_get_module + + # Mock state_db + mock_state_db = MagicMock() + fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] + mock_state_db.get.return_value = fvs_mock + + # Mock db_connect + mock_db_connect = MagicMock() + mock_db_connect.return_value = mock_state_db + + # Mock admin_status + mock_module_updater.get_module_admin_status.return_value = 'down' + + # Patching platform's Chassis object to return the mocked module + with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch, \ + patch.object(sonic_platform.platform.Chassis, 'get_module', side_effect=mock_get_module): + + # Simulate that the system is a SmartSwitch + mock_is_smartswitch.return_value = True - # Act: Patch platform_chassis in the daemon and the get_module method - with patch.object(daemon_chassisd, 'platform_chassis', new=chassis): - with patch.object(chassis, 'get_module', return_value=module) as mock_get_module: - with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): - # Act: Call the method under test - daemon_chassisd.set_initial_dpu_admin_state() + # Patch num_modules for the updater + with patch.object(daemon_chassisd.module_updater, 'num_modules', 1): + # Now run the function that sets the initial admin state + daemon_chassisd.set_initial_dpu_admin_state() def test_daemon_run_supervisor_invalid_slot(): chassis = MockChassis() From 6e29e8805e3c697b23bd02f4a0cbdb9157b23dfe Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 2 Oct 2024 14:58:30 -0700 Subject: [PATCH 084/100] Removed CHASSIS_MODULE_INFO_ASICS for smartswitch --- sonic-chassisd/scripts/chassisd | 2 -- 1 file changed, 2 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 30539f60b..7f2578f90 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -681,7 +681,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): (CHASSIS_MODULE_INFO_SLOT_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]), (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]), - (CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS]))), (CHASSIS_MODULE_INFO_SERIAL_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD])]) prev_status = self.get_module_current_status(key) self.module_table.set(key, fvs) @@ -704,7 +703,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD] = str(desc) module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = 'N/A' module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] = str(status) - module_info_dict[CHASSIS_MODULE_INFO_ASICS] = asics module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD] = str(serial) return module_info_dict From 2ae658dbc20fe7d8862fd0319e1cf78151aec105 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Fri, 11 Oct 2024 09:06:37 -0700 Subject: [PATCH 085/100] Added a docstring and updated a comment --- sonic-chassisd/scripts/chassisd | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 7f2578f90..ddb3be0aa 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -194,6 +194,38 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): """ def module_config_update(self, key, admin_state): + """ + Updates the administrative state of a module based on the provided key and state. + + This function first validates the module key to ensure it starts with the expected + prefix (ModuleBase.MODULE_TYPE_DPU) and retrieves the corresponding module index. + If the index is valid, and the desired admin state is either MODULE_ADMIN_DOWN or + MODULE_ADMIN_UP, a new thread is started to handle the state change by calling the + submit_callback function. + + A lock is used to ensure thread safety during the creation of the new thread. The + lock prevents race conditions by ensuring that only one thread can modify shared + resources, such as self.chassis, at a time. + + Args: + key (str): The unique identifier of the module. Must start with the prefix + defined by ModuleBase.MODULE_TYPE_DPU. + admin_state (str): The desired administrative state of the module. Can be either + MODULE_ADMIN_DOWN or MODULE_ADMIN_UP. + + Returns: + None + + Logs: + - Logs an error if the module name is incorrect or if the module index is invalid. + - Logs the state change information or a warning for an invalid admin_state. + + Thread Safety: + - A lock is acquired before creating a new thread to prevent concurrent access to + shared resources. If the submit_callback accesses shared data, consider adding + the lock inside that function as well to ensure complete thread safety. + """ + if not key.startswith(ModuleBase.MODULE_TYPE_DPU): self.log_error("Incorrect module-name {}. Should start with {}".format(key, ModuleBase.MODULE_TYPE_DPU)) @@ -971,7 +1003,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.log_error("Chassisd not supported for this platform") sys.exit(CHASSIS_NOT_SUPPORTED) - # Start configuration manager task on supervisor module + # Start configuration manager task if self.smartswitch: config_manager = SmartSwitchConfigManagerTask() config_manager.task_run() From a55ecacc4a05afb9ee060783b98b7dbd2e39a3c0 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Fri, 18 Oct 2024 17:47:36 -0700 Subject: [PATCH 086/100] Fixed a thread issue and removed all locks --- sonic-chassisd/scripts/chassisd | 40 ++++++--------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index ddb3be0aa..02b9a5953 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -183,9 +183,6 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): super(SmartSwitchModuleConfigUpdater, self).__init__(log_identifier) self.chassis = chassis - # The threading.Lock is required for concurrent setting of admin_state - # on multiple DPUs for faster bring-up and shutdown. - self.lock = threading.Lock() def deinit(self): """ @@ -203,10 +200,6 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): MODULE_ADMIN_UP, a new thread is started to handle the state change by calling the submit_callback function. - A lock is used to ensure thread safety during the creation of the new thread. The - lock prevents race conditions by ensuring that only one thread can modify shared - resources, such as self.chassis, at a time. - Args: key (str): The unique identifier of the module. Must start with the prefix defined by ModuleBase.MODULE_TYPE_DPU. @@ -219,11 +212,6 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): Logs: - Logs an error if the module name is incorrect or if the module index is invalid. - Logs the state change information or a warning for an invalid admin_state. - - Thread Safety: - - A lock is acquired before creating a new thread to prevent concurrent access to - shared resources. If the submit_callback accesses shared data, consider adding - the lock inside that function as well to ensure complete thread safety. """ if not key.startswith(ModuleBase.MODULE_TYPE_DPU): @@ -240,12 +228,8 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): if (admin_state == MODULE_ADMIN_DOWN) or (admin_state == MODULE_ADMIN_UP): self.log_info("Changing module {} to admin {} state".format(key, 'DOWN' if admin_state == MODULE_ADMIN_DOWN else 'UP')) - # Acquire the lock before submitting the callback function - with self.lock: - # Submit the callback function as a separate thread. It is the responsibility - # of the module to handle multiple b2b requests to the same module - t = threading.Thread(target=self.submit_callback, args=(module_index, admin_state)) - t.start() + t = threading.Thread(target=self.submit_callback, args=(module_index, admin_state)) + t.start() else: self.log_warning("Invalid admin_state value: {}".format(admin_state)) @@ -913,9 +897,12 @@ class ChassisdDaemon(daemon_base.DaemonBase): else: self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) + def platform_set_admin_cb(self, module_index, admin_state): + try_get(self.module_updater.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + pass + def set_initial_dpu_admin_state(self): """Send admin_state trigger once to modules those are powered up""" - lock = threading.Lock() threads = [] for module_index in range(0, self.module_updater.num_modules): op = None @@ -943,20 +930,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): op = MODULE_ADMIN_UP if op is not None: - # Spawn a thread to handle the DPU logic - def handle_dpu_logic(): - try: - with lock: - module = self.module_updater.chassis.get_module(module_index) - if module: - module.set_admin_state(op) - else: - self.log_warning(f"Module {module_index} not found") - except Exception as e: - self.log_error(f"Error in setting admin state for module {module_index}: {str(e)}", exc_info=True) - - # Create and start a thread for the DPU logic - thread = threading.Thread(target=handle_dpu_logic) + thread = threading.Thread(target=self.platform_set_admin_cb, args=(module_index, op)) thread.daemon = True # Set as a daemon thread thread.start() threads.append(thread) From 54accfe0dd743a1ee2d0754ac7fd329e7e96929b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 22 Oct 2024 16:13:46 -0700 Subject: [PATCH 087/100] Added support to persist reboot-cause, user defined reboot timeout, chassisd db clean up on admin_down state, removed the thread locks --- sonic-chassisd/scripts/chassisd | 89 ++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 02b9a5953..b0edb62b4 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -15,6 +15,7 @@ try: import sys import threading import time + import json from sonic_py_common import daemon_base, logger, device_info from sonic_py_common.task_base import ProcessTaskBase @@ -73,8 +74,9 @@ CHASSIS_MODULE_REBOOT_INFO_TABLE = 'CHASSIS_MODULE_REBOOT_INFO_TABLE' CHASSIS_MODULE_REBOOT_TIMESTAMP_FIELD = 'timestamp' CHASSIS_MODULE_REBOOT_REBOOT_FIELD = 'reboot' DEFAULT_LINECARD_REBOOT_TIMEOUT = 180 -DEFAULT_DPU_REBOOT_TIMEOUT = 180 +DEFAULT_DPU_REBOOT_TIMEOUT = 360 PLATFORM_ENV_CONF_FILE = "/usr/share/sonic/platform/platform_env.conf" +PLATFORM_JSON_FILE = "/usr/share/sonic/platform/platform.json" CHASSIS_INFO_UPDATE_PERIOD_SECS = 10 CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD = 30 # Minutes @@ -191,29 +193,6 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): """ def module_config_update(self, key, admin_state): - """ - Updates the administrative state of a module based on the provided key and state. - - This function first validates the module key to ensure it starts with the expected - prefix (ModuleBase.MODULE_TYPE_DPU) and retrieves the corresponding module index. - If the index is valid, and the desired admin state is either MODULE_ADMIN_DOWN or - MODULE_ADMIN_UP, a new thread is started to handle the state change by calling the - submit_callback function. - - Args: - key (str): The unique identifier of the module. Must start with the prefix - defined by ModuleBase.MODULE_TYPE_DPU. - admin_state (str): The desired administrative state of the module. Can be either - MODULE_ADMIN_DOWN or MODULE_ADMIN_UP. - - Returns: - None - - Logs: - - Logs an error if the module name is incorrect or if the module index is invalid. - - Logs the state change information or a warning for an invalid admin_state. - """ - if not key.startswith(ModuleBase.MODULE_TYPE_DPU): self.log_error("Incorrect module-name {}. Should start with {}".format(key, ModuleBase.MODULE_TYPE_DPU)) @@ -385,18 +364,19 @@ class ModuleUpdater(logger.Logger): # identifying module operational status change. But the clean up will not be attempted for supervisor if down_module_key not in self.down_modules: - self.log_warning("Module {} went off-line!".format(key)) + self.log_warning("Module {} (Slot {}) went off-line!".format(key, module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD])) self.down_modules[down_module_key] = {} self.down_modules[down_module_key]['down_time'] = time.time() self.down_modules[down_module_key]['cleaned'] = False + self.down_modules[down_module_key]['slot'] = module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] continue else: # Module is operational. Remove it from down time tracking. if down_module_key in self.down_modules: - self.log_notice("Module {} recovered on-line!".format(key)) + self.log_notice("Module {} (Slot {}) recovered on-line!".format(key, module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD])) del self.down_modules[down_module_key] elif prev_status != ModuleBase.MODULE_STATUS_ONLINE: - self.log_notice("Module {} is on-line!".format(key)) + self.log_notice("Module {} (Slot {}) is on-line!".format(key, module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] )) module_cfg_status = self.get_module_admin_status(key) @@ -522,17 +502,17 @@ class ModuleUpdater(logger.Logger): if midplane_access is False and current_midplane_state == 'True': if self.is_module_reboot_expected(module_key): self.module_reboot_set_time(module_key) - self.log_warning("Expected: Module {} lost midplane connectivity".format(module_key)) + self.log_warning("Expected: Module {} (Slot {}) lost midplane connectivity".format(module_key, module.get_slot())) else: - self.log_warning("Unexpected: Module {} lost midplane connectivity".format(module_key)) + self.log_warning("Unexpected: Module {} (Slot {}) lost midplane connectivity".format(module_key, module.get_slot())) elif midplane_access is True and current_midplane_state == 'False': - self.log_notice("Module {} midplane connectivity is up".format(module_key)) + self.log_notice("Module {} (Slot {}) midplane connectivity is up".format(module_key, module.get_slot())) # clean up the reboot_info_table if self.module_reboot_table.get(module_key) is not None: self.module_reboot_table._del(module_key) elif midplane_access is False and current_midplane_state == 'False': if self.is_module_reboot_system_up_expired(module_key): - self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.linecard_reboot_timeout)) + self.log_warning("Unexpected: Module {} (Slot {}) midplane connectivity is not restored in {} seconds".format(module_key, module.get_slot(), self.linecard_reboot_timeout)) # Update db with midplane information fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), @@ -619,11 +599,12 @@ class ModuleUpdater(logger.Logger): for module in self.down_modules: if self.down_modules[module]['cleaned'] == False: down_time = self.down_modules[module]['down_time'] + slot = self.down_modules[module]['slot'] delta = (time_now - down_time) / 60 if delta >= CHASSIS_DB_CLEANUP_MODULE_DOWN_PERIOD: if module.startswith(ModuleBase.MODULE_TYPE_LINE): # Module is down for more than 30 minutes. Do the chassis clean up - self.log_notice("Module {} is down for long time. Initiating chassis app db clean up".format(module)) + self.log_notice("Module {} (Slot {}) is down for long time. Initiating chassis app db clean up".format(module, slot)) self._cleanup_chassis_app_db(module) self.down_modules[module]['cleaned'] = True @@ -634,6 +615,8 @@ class ModuleUpdater(logger.Logger): class SmartSwitchModuleUpdater(ModuleUpdater): + prev_status = [] + def __init__(self, log_identifier, chassis): """ Constructor for ModuleUpdater @@ -665,6 +648,21 @@ class SmartSwitchModuleUpdater(ModuleUpdater): if not self.midplane_initialized: self.log_error("Chassisd midplane intialization failed") + for module_index in range(0, self.num_modules): + self.prev_status.append(ModuleBase.MODULE_STATUS_OFFLINE) + + self.dpu_reboot_timeout = DEFAULT_DPU_REBOOT_TIMEOUT + if os.path.isfile(PLATFORM_JSON_FILE): + try: + with open(PLATFORM_JSON_FILE, 'r') as f: + platform_cfg = json.load(f) + # Extract the "dpu_reboot_timeout" if it exists + self.dpu_reboot_timeout = int(platform_cfg.get("dpu_reboot_timeout", DEFAULT_DPU_REBOOT_TIMEOUT)) + except (json.JSONDecodeError, ValueError) as e: + self.log_error("Error parsing {}: {}".format(PLATFORM_JSON_FILE, e)) + except Exception as e: + self.log_error("Unexpected error: {}".format(e)) + def deinit(self): """ Destructor of ModuleUpdater @@ -682,7 +680,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): def module_db_update(self): notOnlineModules = [] - for module_index in range(0, self.num_modules): module_info_dict = self._get_module_info(module_index) if module_info_dict is not None: @@ -698,7 +695,16 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]), (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]), (CHASSIS_MODULE_INFO_SERIAL_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD])]) - prev_status = self.get_module_current_status(key) + current_status = self.get_module_current_status(key) + if self.prev_status[module_index] == ModuleBase.MODULE_STATUS_OFFLINE: + if current_status != ModuleBase.MODULE_STATUS_OFFLINE: + # DPU module has been turned on, update the reboot-cause + # If the reboot-cause file not already present save it + # If the STATE_DB REBOOT_CAUSE table already doesn't have + # an entry for the recent reboot update the DB + reboot_cause = try_get(self.chassis.get_module(module_index).get_reboot_cause) + + self.prev_status[module_index] = current_status self.module_table.set(key, fvs) def _get_module_info(self, module_index): @@ -759,7 +765,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.module_reboot_table._del(module_key) elif midplane_access is False and current_midplane_state == 'False': if self.is_module_reboot_system_up_expired(module_key): - self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, DEFAULT_DPU_REBOOT_TIMEOUT)) + self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.dpu_reboot_timeout)) # Update db with midplane information fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), @@ -772,10 +778,12 @@ class SmartSwitchModuleUpdater(ModuleUpdater): for module_index in range(0, self.num_modules): name = try_get(self.chassis.get_module(module_index).get_name) - + pattern = "*" + name + "*" if self.get_module_admin_status(name) != 'up': - key = "*|" + name - chassis_state_db.delete(key) + keys = chassis_state_db.keys(pattern) + if keys: + for key in keys: + chassis_state_db.delete(key) return @@ -897,7 +905,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): else: self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) - def platform_set_admin_cb(self, module_index, admin_state): + def submit_dpu_callback(self, module_index, admin_state): try_get(self.module_updater.chassis.get_module(module_index).set_admin_state, admin_state, default=False) pass @@ -930,7 +938,8 @@ class ChassisdDaemon(daemon_base.DaemonBase): op = MODULE_ADMIN_UP if op is not None: - thread = threading.Thread(target=self.platform_set_admin_cb, args=(module_index, op)) + # Create and start a thread for the DPU logic + thread = threading.Thread(target=self.submit_dpu_callback, args=(module_index, op)) thread.daemon = True # Set as a daemon thread thread.start() threads.append(thread) From 466f6d333da5a7069713e07116c8abb50a680a11 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 23 Oct 2024 07:16:45 -0700 Subject: [PATCH 088/100] Added get_reboot_cause() to mock_platform.py --- sonic-chassisd/tests/mock_platform.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index 7f8ed6726..a2a86b4a9 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -75,6 +75,17 @@ def set_midplane_reachable(self, up): def get_all_asics(self): return self.asic_list + def get_reboot_cause(self): + reboot_cause = { + 'name': '2024_10_19_01_17_18', + 'cause': 'reboot', + 'comment': 'N/A', + 'time': 'N/A', + 'device': 'DPU0', + 'user': 'bla' + } + return reboot_cause + def get_serial(self): return self.module_serial From 43e6b6143646853191a92b6a13561fb40d5e9914 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 15:36:47 -0700 Subject: [PATCH 089/100] Added the necessary changes for dark-mode dpu initial admin status, seamless migration to lightup mode, dpu_state db update when midplane fails, persisting dpu reboot cause etc --- sonic-chassisd/scripts/chassisd | 222 ++++++++++++++++++++++++++++---- 1 file changed, 194 insertions(+), 28 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index b0edb62b4..aa4676e21 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -16,6 +16,7 @@ try: import threading import time import json + from datetime import datetime from sonic_py_common import daemon_base, logger, device_info from sonic_py_common.task_base import ProcessTaskBase @@ -96,6 +97,8 @@ INVALID_IP = '0.0.0.0' CHASSIS_MODULE_ADMIN_STATUS = 'admin_status' MODULE_ADMIN_DOWN = 0 MODULE_ADMIN_UP = 1 +REBOOT_CAUSE_DIR = "/host/reboot-cause/module/" +MAX_HISTORY_FILES = 10 # This daemon should return non-zero exit code so that supervisord will # restart it automatically. @@ -615,8 +618,6 @@ class ModuleUpdater(logger.Logger): class SmartSwitchModuleUpdater(ModuleUpdater): - prev_status = [] - def __init__(self, log_identifier, chassis): """ Constructor for ModuleUpdater @@ -648,9 +649,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): if not self.midplane_initialized: self.log_error("Chassisd midplane intialization failed") - for module_index in range(0, self.num_modules): - self.prev_status.append(ModuleBase.MODULE_STATUS_OFFLINE) - self.dpu_reboot_timeout = DEFAULT_DPU_REBOOT_TIMEOUT if os.path.isfile(PLATFORM_JSON_FILE): try: @@ -678,6 +676,16 @@ class SmartSwitchModuleUpdater(ModuleUpdater): if self.chassis_table is not None: self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) + def get_module_admin_status(self, chassis_module_name): + config_db = daemon_base.db_connect("CONFIG_DB") + vtable = swsscommon.Table(config_db, CHASSIS_CFG_TABLE) + fvs = vtable.get(chassis_module_name) + if isinstance(fvs, list) and fvs[0] is True: + fvs = dict(fvs[-1]) + return fvs[CHASSIS_MODULE_ADMIN_STATUS] + else: + return 'down' + def module_db_update(self): notOnlineModules = [] for module_index in range(0, self.num_modules): @@ -695,16 +703,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]), (CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]), (CHASSIS_MODULE_INFO_SERIAL_FIELD, module_info_dict[CHASSIS_MODULE_INFO_SERIAL_FIELD])]) - current_status = self.get_module_current_status(key) - if self.prev_status[module_index] == ModuleBase.MODULE_STATUS_OFFLINE: - if current_status != ModuleBase.MODULE_STATUS_OFFLINE: - # DPU module has been turned on, update the reboot-cause - # If the reboot-cause file not already present save it - # If the STATE_DB REBOOT_CAUSE table already doesn't have - # an entry for the recent reboot update the DB - reboot_cause = try_get(self.chassis.get_module(module_index).get_reboot_cause) - - self.prev_status[module_index] = current_status self.module_table.set(key, fvs) def _get_module_info(self, module_index): @@ -732,6 +730,158 @@ class SmartSwitchModuleUpdater(ModuleUpdater): def _is_supervisor(self): return False + def update_dpu_state(self, key): + """ + Update DPU state in chassisStateDB using the given key. + """ + try: + # Connect to the CHASSIS_STATE_DB using daemon_base + self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + + # Fetch the current data for the given key and convert it to a dict + current_data = self._convert_to_dict(self.chassis_state_db.hgetall(key)) + + if current_data: + self.chassis_state_db.delete(key) + + # Prepare the updated data + updates = { + "dpu_midplane_link_state": "DOWN", + "dpu_midplane_link_reason": "unknown", + "dpu_midplane_link_time": datetime.now().strftime("%Y%m%d %H:%M:%S"), + } + current_data.update(updates) + + for field, value in current_data.items(): + self.chassis_state_db.hset(key, field, value) + + except Exception as e: + self.log_error(f"Unexpected error: {e}") + + def _convert_to_dict(self, data): + """ + Converts SWIG proxy object or native dict to a Python dictionary. + """ + if isinstance(data, dict): + return data # Already a dict, return as-is + else: + return dict(data) # Convert SWIG proxy object to dict + + def _get_current_time_str(self): + """Returns the current time as a string in 'YYYY_MM_DD_HH_MM_SS' format.""" + return datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + + def _get_history_path(self, module, file_name): + """Generates the full path for history files.""" + return os.path.join(REBOOT_CAUSE_DIR, module.lower(), "history", file_name) + + def _is_first_boot(self, module): + """Checks if the reboot-cause file indicates a first boot.""" + file_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "reboot-cause.txt") + + try: + with open(file_path, 'r') as f: + content = f.read().strip() + return content == "First boot" + except FileNotFoundError: + return False + + def persist_dpu_reboot_time(self, module): + """Persist the current reboot time to a file.""" + time_str = self._get_current_time_str() + path = self._get_history_path(module, "prev_reboot_time.txt") + + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, 'w') as f: + f.write(time_str) + + def retrieve_dpu_reboot_time(self, module): + """Retrieve the persisted reboot time from a file.""" + path = self._get_history_path(module, "prev_reboot_time.txt") + + try: + with open(path, 'r') as f: + return f.read().strip() + except FileNotFoundError: + return None + + def persist_dpu_reboot_cause(self, reboot_cause, module): + """Persist the reboot cause information and handle file rotation.""" + prev_reboot_time = self.retrieve_dpu_reboot_time(module) + if prev_reboot_time is None: + prev_reboot_time = self._get_current_time_str() + + file_name = f"{prev_reboot_time}_reboot_cause.txt" + prev_reboot_path = self._get_history_path(module, "prev_reboot_time.txt") + + if os.path.exists(prev_reboot_path): + os.remove(prev_reboot_path) + + file_path = self._get_history_path(module, file_name) + dt_obj = datetime.strptime(prev_reboot_time, "%Y_%m_%d_%H_%M_%S") + formatted_time = dt_obj.strftime("%a %b %d %I:%M:%S %p UTC %Y") + + reboot_cause_dict = { + "cause": reboot_cause, + "comment": "N/A", + "device": module, + "time": dt_obj.strftime("%Y-%m-%d %H:%M:%S"), + "name": prev_reboot_time, + } + + with open(file_path, 'w') as f: + json.dump(reboot_cause_dict, f) + + # Write the reboot_cause content to the reboot-cause.txt file, overwriting it + reboot_cause_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "reboot-cause.txt") + os.makedirs(os.path.dirname(reboot_cause_path), exist_ok=True) + with open(reboot_cause_path, 'w') as cause_file: + cause_file.write(reboot_cause + '\n') + + # Update symlink to the latest reboot cause file + symlink_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "previous-reboot-cause.json") + if os.path.exists(symlink_path): + os.remove(symlink_path) + os.symlink(file_path, symlink_path) + + # Perform file rotation if necessary + self._rotate_files(module) + + def _rotate_files(self, module): + """Rotate history files if they exceed the maximum limit.""" + history_dir = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "history") + files = sorted(os.listdir(history_dir)) + + if len(files) > MAX_HISTORY_FILES: + for old_file in files[:-MAX_HISTORY_FILES]: + os.remove(os.path.join(history_dir, old_file)) + + def update_dpu_reboot_cause_to_db(self, module): + """Update the reboot cause in CHASSIS_STATE_DB.""" + reboot_cause_dict = self.retrieve_latest_reboot_cause(module) + if not reboot_cause_dict: + raise ValueError(f"No reboot cause data found for module: {module}") + + reboot_time = reboot_cause_dict.get("name", self._get_current_time_str()) + key = f"REBOOT_CAUSE|{module.upper()}|{reboot_time}" + + if not self.chassis_state_db: + self.chassis_state_db = ConfigDBConnector() + self.chassis_state_db.connect("CHASSIS_STATE_DB") + + # Use hset to store the updated data in one call + for field, value in reboot_cause_dict.items(): + self.chassis_state_db.hset(key, field, value) + + def retrieve_latest_reboot_cause(self, module): + """Retrieve the most recent reboot cause file content.""" + symlink_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "previous-reboot-cause.json") + try: + with open(symlink_path, 'r') as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return None + def check_midplane_reachability(self): if not self.midplane_initialized: return @@ -743,6 +893,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_key = try_get(module.get_name, default='MODULE {}'.format(index)) midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP) midplane_access = try_get(module.is_midplane_reachable, default=False) + dpu_admin_state = self.get_module_admin_status(module_key) # Generate syslog for the loss of midplane connectivity when midplane connectivity # loss is detected for the first time @@ -753,19 +904,34 @@ class SmartSwitchModuleUpdater(ModuleUpdater): current_midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] if midplane_access is False and current_midplane_state == 'True': - if self.is_module_reboot_expected(module_key): - self.module_reboot_set_time(module_key) - self.log_warning("Expected: Module {} lost midplane connectivity".format(module_key)) - else: - self.log_warning("Unexpected: Module {} lost midplane connectivity".format(module_key)) + self.log_warning("Unexpected: Module {} lost midplane connectivity".format(module_key)) + + # Update midplane state in the chassisStateDB DPU_STATE table + key = "DPU_STATE|" + module_key + self.update_dpu_state(key) + + # Persist dpu down time + self.persist_dpu_reboot_time(module_key) + elif midplane_access is True and current_midplane_state == 'False': self.log_notice("Module {} midplane connectivity is up".format(module_key)) - # clean up the reboot_info_table - if self.module_reboot_table.get(module_key) is not None: - self.module_reboot_table._del(module_key) + reboot_cause = try_get(self.chassis.get_module(index).get_reboot_cause) + + if not self.retrieve_dpu_reboot_time(module_key) is None or self._is_first_boot(module_key): + # persist reboot cause + self.persist_dpu_reboot_cause(reboot_cause, module_key) + + # update reboot cause to db + time = self.retrieve_dpu_reboot_time(module_key) + if time is None: + time = self._get_current_time_str() + + key = "REBOOT_CAUSE|" + module_key + "|" + time + self.update_dpu_reboot_cause_to_db(module_key) + elif midplane_access is False and current_midplane_state == 'False': if self.is_module_reboot_system_up_expired(module_key): - self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.dpu_reboot_timeout)) + self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.linecard_reboot_timeout)) # Update db with midplane information fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), @@ -930,12 +1096,12 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Get admin state of DPU key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] admin_state = self.module_updater.get_module_admin_status(key) - if admin_state == 'down' and midplane_state == 'True': - # shutdown DPU - op = MODULE_ADMIN_DOWN - if admin_state != 'down' and midplane_state == 'False': + if admin_state == 'up' and midplane_state == 'False': # startup DPU op = MODULE_ADMIN_UP + elif admin_state != 'up' and midplane_state == 'True': + # shutdown DPU + op = MODULE_ADMIN_DOWN if op is not None: # Create and start a thread for the DPU logic From 5a81fc77202528297c8371dfd7773a54f78b49dd Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 16:23:14 -0700 Subject: [PATCH 090/100] Fixing test issues --- sonic-chassisd/tests/test_chassisd.py | 185 ++++++++++++++------------ 1 file changed, 101 insertions(+), 84 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 004087ab1..d619dbe80 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,6 +1,7 @@ import os import sys import mock +import tempfile from imp import load_source from mock import Mock, MagicMock, patch @@ -547,90 +548,106 @@ def mock_open(*args, **kwargs): # unpatched version for every other path return builtin_open(*args, **kwargs) -def test_midplane_presence_dpu_modules(): - chassis = MockSmartSwitchChassis() - - #DPU0 - index = 0 - name = "DPU0" - desc = "DPU Module 0" - slot = 0 - sup_slot = 0 - serial = "DPU0-0000" - module_type = ModuleBase.MODULE_TYPE_DPU - module = MockModule(index, name, desc, module_type, slot, serial) - module.set_midplane_ip() - chassis.module_list.append(module) - - #Run on supervisor - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) - module_updater.modules_num_update() - module_updater.module_db_update() - module_updater.check_midplane_reachability() - - midplane_table = module_updater.midplane_table - #Check only one entry in database - assert 1 == midplane_table.size() - - #Check fields in database - name = "DPU0" - fvs = midplane_table.get(name) - assert fvs != None - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] - assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - - #DPU Down to Up (midplane connectivity is down initially) - module.set_midplane_reachable(True) - module_updater.check_midplane_reachability() - fvs = midplane_table.get(name) - assert fvs != None - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] - assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - - #DPU Up to Down (to mock midplane connectivity state change) - module.set_midplane_reachable(False) - module_updater.check_midplane_reachability() - fvs = midplane_table.get(name) - assert fvs != None - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] - assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] - - #Deinit - module_updater.deinit() - fvs = midplane_table.get(name) - assert fvs == None - -def test_midplane_presence_uninitialized_dpu_modules(): - chassis = MockSmartSwitchChassis() - - #DPU0 - index = 0 - name = "DPU0" - desc = "DPU Module 0" - slot = 0 - sup_slot = 0 - serial = "DPU0-0000" - module_type = ModuleBase.MODULE_TYPE_DPU - module = MockModule(index, name, desc, module_type, slot, serial) - module.set_midplane_ip() - chassis.module_list.append(module) - - #Run on supervisor - module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) - module_updater.midplane_initialized = False - module_updater.modules_num_update() - module_updater.module_db_update() - module_updater.check_midplane_reachability() - - midplane_table = module_updater.midplane_table - #Check only one entry in database - assert 1 != midplane_table.size() +@patch('os.makedirs') +def test_midplane_presence_dpu_modules(mock_makedirs): + with tempfile.TemporaryDirectory() as temp_dir: + # Assume your method uses a path variable that you can set for testing + path = os.path.join(temp_dir, 'subdir') + + # Set up your mock or variable to use temp_dir + mock_makedirs.side_effect = lambda x, **kwargs: None # Prevent actual call + + chassis = MockSmartSwitchChassis() + + #DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Run on supervisor + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.modules_num_update() + module_updater.module_db_update() + module_updater.check_midplane_reachability() + + midplane_table = module_updater.midplane_table + #Check only one entry in database + assert 1 == midplane_table.size() + + #Check fields in database + name = "DPU0" + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #DPU Down to Up (midplane connectivity is down initially) + module.set_midplane_reachable(True) + module_updater.check_midplane_reachability() + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #DPU Up to Down (to mock midplane connectivity state change) + module.set_midplane_reachable(False) + module_updater.check_midplane_reachability() + fvs = midplane_table.get(name) + assert fvs != None + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] + assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + + #Deinit + module_updater.deinit() + fvs = midplane_table.get(name) + assert fvs == None + +@patch('os.makedirs') +def test_midplane_presence_uninitialized_dpu_modules(mock_makedirs): + with tempfile.TemporaryDirectory() as temp_dir: + # Assume your method uses a path variable that you can set for testing + path = os.path.join(temp_dir, 'subdir') + + # Set up your mock or variable to use temp_dir + mock_makedirs.side_effect = lambda x, **kwargs: None # Prevent actual call + + chassis = MockSmartSwitchChassis() + + #DPU0 + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + sup_slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + chassis.module_list.append(module) + + #Run on supervisor + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.midplane_initialized = False + module_updater.modules_num_update() + module_updater.module_db_update() + module_updater.check_midplane_reachability() + + midplane_table = module_updater.midplane_table + #Check only one entry in database + assert 1 != midplane_table.size() @patch("builtins.open", mock_open) @patch('os.path.isfile', MagicMock(return_value=True)) From 53ce3a6c97df00e94c63a9eeb17af400b7e04f00 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 18:11:25 -0700 Subject: [PATCH 091/100] Added a mock for file open --- sonic-chassisd/tests/test_chassisd.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index d619dbe80..e305ca8a6 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -4,7 +4,7 @@ import tempfile from imp import load_source -from mock import Mock, MagicMock, patch +from mock import Mock, MagicMock, patch, mock_open from sonic_py_common import daemon_base from .mock_platform import MockChassis, MockSmartSwitchChassis, MockModule @@ -549,7 +549,8 @@ def mock_open(*args, **kwargs): return builtin_open(*args, **kwargs) @patch('os.makedirs') -def test_midplane_presence_dpu_modules(mock_makedirs): +@patch('builtins.open', new_callable=mock_open) +def test_midplane_presence_dpu_modules(mock_open, mock_makedirs): with tempfile.TemporaryDirectory() as temp_dir: # Assume your method uses a path variable that you can set for testing path = os.path.join(temp_dir, 'subdir') @@ -616,7 +617,8 @@ def test_midplane_presence_dpu_modules(mock_makedirs): assert fvs == None @patch('os.makedirs') -def test_midplane_presence_uninitialized_dpu_modules(mock_makedirs): +@patch('builtins.open', new_callable=mock_open) +def test_midplane_presence_uninitialized_dpu_modules(mock_open, mock_makedirs): with tempfile.TemporaryDirectory() as temp_dir: # Assume your method uses a path variable that you can set for testing path = os.path.join(temp_dir, 'subdir') From 094c0ad4f3593bff5b399c9b1df0ff95f777b0f3 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 19:26:41 -0700 Subject: [PATCH 092/100] working on test coverage --- sonic-chassisd/tests/test_chassisd.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index e305ca8a6..32232842a 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -6,6 +6,7 @@ from mock import Mock, MagicMock, patch, mock_open from sonic_py_common import daemon_base +from mock import mock_open as dpu_mock_open from .mock_platform import MockChassis, MockSmartSwitchChassis, MockModule from .mock_module_base import ModuleBase @@ -541,16 +542,9 @@ def test_midplane_presence_modules(): fvs = midplane_table.get(name) assert fvs == None -builtin_open = open # save the unpatched version -def mock_open(*args, **kwargs): - if args[0] == PLATFORM_ENV_CONF_FILE: - return mock.mock_open(read_data="dummy=1\nlinecard_reboot_timeout=240\n")(*args, **kwargs) - # unpatched version for every other path - return builtin_open(*args, **kwargs) - @patch('os.makedirs') -@patch('builtins.open', new_callable=mock_open) -def test_midplane_presence_dpu_modules(mock_open, mock_makedirs): +@patch('builtins.open', new_callable=dpu_mock_open) +def test_midplane_presence_dpu_modules(dpu_mock_open, mock_makedirs): with tempfile.TemporaryDirectory() as temp_dir: # Assume your method uses a path variable that you can set for testing path = os.path.join(temp_dir, 'subdir') @@ -617,8 +611,8 @@ def test_midplane_presence_dpu_modules(mock_open, mock_makedirs): assert fvs == None @patch('os.makedirs') -@patch('builtins.open', new_callable=mock_open) -def test_midplane_presence_uninitialized_dpu_modules(mock_open, mock_makedirs): +@patch('builtins.open', new_callable=dpu_mock_open) +def test_midplane_presence_uninitialized_dpu_modules(dpu_mock_open, mock_makedirs): with tempfile.TemporaryDirectory() as temp_dir: # Assume your method uses a path variable that you can set for testing path = os.path.join(temp_dir, 'subdir') @@ -651,6 +645,13 @@ def test_midplane_presence_uninitialized_dpu_modules(mock_open, mock_makedirs): #Check only one entry in database assert 1 != midplane_table.size() +builtin_open = open # save the unpatched version +def mock_open(*args, **kwargs): + if args[0] == PLATFORM_ENV_CONF_FILE: + return mock.mock_open(read_data="dummy=1\nlinecard_reboot_timeout=240\n")(*args, **kwargs) + # unpatched version for every other path + return builtin_open(*args, **kwargs) + @patch("builtins.open", mock_open) @patch('os.path.isfile', MagicMock(return_value=True)) def test_midplane_presence_modules_linecard_reboot(): From 55fd67894cab47da7b9afc5525fa8904e2107325 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 19:56:29 -0700 Subject: [PATCH 093/100] Initialized previous reboot time --- sonic-chassisd/tests/test_chassisd.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 32232842a..fb9d4ee85 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -564,6 +564,7 @@ def test_midplane_presence_dpu_modules(dpu_mock_open, mock_makedirs): module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() + module.prev_reboot_time = "2024_10_30_02_44_50" chassis.module_list.append(module) #Run on supervisor @@ -632,6 +633,7 @@ def test_midplane_presence_uninitialized_dpu_modules(dpu_mock_open, mock_makedir module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() + module.prev_reboot_time = "2024_10_30_02_44_50" chassis.module_list.append(module) #Run on supervisor From c42427156fe4b79ae74449b3bfff1f245e206bcd Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 20:16:18 -0700 Subject: [PATCH 094/100] Fixed time format --- sonic-chassisd/scripts/chassisd | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index aa4676e21..0d956d46c 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -818,14 +818,18 @@ class SmartSwitchModuleUpdater(ModuleUpdater): os.remove(prev_reboot_path) file_path = self._get_history_path(module, file_name) - dt_obj = datetime.strptime(prev_reboot_time, "%Y_%m_%d_%H_%M_%S") + try: + dt_obj = datetime.strptime(prev_reboot_time, "%Y_%m_%d_%H_%M_%S") + except ValueError: + dt_obj = datetime.now() + formatted_time = dt_obj.strftime("%a %b %d %I:%M:%S %p UTC %Y") reboot_cause_dict = { "cause": reboot_cause, "comment": "N/A", "device": module, - "time": dt_obj.strftime("%Y-%m-%d %H:%M:%S"), + "time": formatted_time, "name": prev_reboot_time, } From 90e982572fbbe7028dcc91085c417aab8ad18558 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 20:36:30 -0700 Subject: [PATCH 095/100] For some reason the test thinks that "reboot_cause" is a dict. So converting it to string format --- sonic-chassisd/scripts/chassisd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 0d956d46c..a62d7653c 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -840,7 +840,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater): reboot_cause_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "reboot-cause.txt") os.makedirs(os.path.dirname(reboot_cause_path), exist_ok=True) with open(reboot_cause_path, 'w') as cause_file: - cause_file.write(reboot_cause + '\n') + cause_file.write(json.dumps(reboot_cause) + '\n') # Update symlink to the latest reboot cause file symlink_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "previous-reboot-cause.json") From ee11414d3e230c300a784821140523fb79d9c034 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 22:00:24 -0700 Subject: [PATCH 096/100] checking if the file_path exists before creating a new symlink --- sonic-chassisd/scripts/chassisd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index a62d7653c..9b0873b30 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -846,7 +846,8 @@ class SmartSwitchModuleUpdater(ModuleUpdater): symlink_path = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "previous-reboot-cause.json") if os.path.exists(symlink_path): os.remove(symlink_path) - os.symlink(file_path, symlink_path) + if os.path.exists(file_path): + os.symlink(file_path, symlink_path) # Perform file rotation if necessary self._rotate_files(module) From 153962bcc767d049431650fcfeffa3043bd0c1fa Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 22:24:58 -0700 Subject: [PATCH 097/100] Adding some error handling --- sonic-chassisd/scripts/chassisd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 9b0873b30..697e1895c 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -855,8 +855,12 @@ class SmartSwitchModuleUpdater(ModuleUpdater): def _rotate_files(self, module): """Rotate history files if they exceed the maximum limit.""" history_dir = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "history") + os.makedirs(history_dir, exist_ok=True) files = sorted(os.listdir(history_dir)) + if not files: + return + if len(files) > MAX_HISTORY_FILES: for old_file in files[:-MAX_HISTORY_FILES]: os.remove(os.path.join(history_dir, old_file)) From 465bf315c4ce346eee665c95dfbc10eae51a6896 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 22:43:37 -0700 Subject: [PATCH 098/100] Adding error handling --- sonic-chassisd/scripts/chassisd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 697e1895c..211cf1bc4 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -856,7 +856,10 @@ class SmartSwitchModuleUpdater(ModuleUpdater): """Rotate history files if they exceed the maximum limit.""" history_dir = os.path.join(REBOOT_CAUSE_DIR, module.lower(), "history") os.makedirs(history_dir, exist_ok=True) - files = sorted(os.listdir(history_dir)) + try: + files = sorted(os.listdir(history_dir)) + except FileNotFoundError: + return if not files: return From 1de545e1034ca01114daf298f8aa3a129f192f65 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 23:03:55 -0700 Subject: [PATCH 099/100] Adding error handling --- sonic-chassisd/scripts/chassisd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 211cf1bc4..eefdf942e 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -872,7 +872,8 @@ class SmartSwitchModuleUpdater(ModuleUpdater): """Update the reboot cause in CHASSIS_STATE_DB.""" reboot_cause_dict = self.retrieve_latest_reboot_cause(module) if not reboot_cause_dict: - raise ValueError(f"No reboot cause data found for module: {module}") + self.log_warning("No reboot cause data found for module:{}".format(module)) + return reboot_time = reboot_cause_dict.get("name", self._get_current_time_str()) key = f"REBOOT_CAUSE|{module.upper()}|{reboot_time}" From d2969bd578d8a6bb508b3d0599e62ad7c7015e5c Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 29 Oct 2024 23:09:56 -0700 Subject: [PATCH 100/100] Adding error handling --- sonic-chassisd/scripts/chassisd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index eefdf942e..1ccc50920 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -884,7 +884,8 @@ class SmartSwitchModuleUpdater(ModuleUpdater): # Use hset to store the updated data in one call for field, value in reboot_cause_dict.items(): - self.chassis_state_db.hset(key, field, value) + if field and value is not None: + self.chassis_state_db.hset(key, field, value) def retrieve_latest_reboot_cause(self, module): """Retrieve the most recent reboot cause file content."""