Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start instance monitoring before instance is launched #252

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions include/aos/common/monitoring/monitoring.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,11 @@ struct InstanceMonitoringData {
{
}

InstanceIdent mInstanceIdent = {};
MonitoringData mMonitoringData = {};
uint32_t mUID = 0;
uint32_t mGID = 0;
InstanceIdent mInstanceIdent = {};
MonitoringData mMonitoringData = {};
uint32_t mUID = 0;
uint32_t mGID = 0;
InstanceRunState mRunState = InstanceRunStateEnum::eFailed;

/**
* Compares instance monitoring data.
Expand Down Expand Up @@ -233,6 +234,15 @@ class ResourceMonitorItf {
*/
virtual Error StartInstanceMonitoring(const String& instanceID, const InstanceMonitorParams& monitoringConfig) = 0;

/**
* Updates instance's run state.
*
* @param instanceID instance ID.
* @param runState run state.
* @return Error.
*/
virtual Error UpdateInstanceRunState(const String& instanceID, InstanceRunState runState) = 0;

/**
* Stops instance monitoring.
*
Expand Down
9 changes: 9 additions & 0 deletions include/aos/common/monitoring/resourcemonitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ class ResourceMonitor : public ResourceMonitorItf,
*/
Error StartInstanceMonitoring(const String& instanceID, const InstanceMonitorParams& monitoringConfig) override;

/**
* Updates instance's run state.
*
* @param instanceID instance ID.
* @param runState run state.
* @return Error.
*/
Error UpdateInstanceRunState(const String& instanceID, InstanceRunState runState) override;

/**
* Stops instance monitoring.
*
Expand Down
26 changes: 23 additions & 3 deletions src/common/monitoring/resourcemonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@

if (err = mResourceUsageProvider->GetInstanceMonitoringData(
instanceID, mInstanceMonitoringData.Find(instanceID)->mSecond);
!err.IsNone()) {
!err.IsNone() && !err.Is(ErrorEnum::eNotFound)) {
LOG_WRN() << "Can't get instance monitoring data: instanceID=" << instanceID << ", err=" << err;
}

Expand All @@ -166,6 +166,22 @@
return ErrorEnum::eNone;
}

Error ResourceMonitor::UpdateInstanceRunState(const String& instanceID, InstanceRunState runState)

Check warning on line 169 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L169

Added line #L169 was not covered by tests
{
LockGuard lock {mMutex};

Check warning on line 171 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L171

Added line #L171 was not covered by tests

LOG_DBG() << "Update instance run state: instanceID=" << instanceID << ", runState=" << runState;

Check warning on line 173 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L173

Added line #L173 was not covered by tests

auto instanceData = mInstanceMonitoringData.Find(instanceID);

Check warning on line 175 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L175

Added line #L175 was not covered by tests
if (instanceData == mInstanceMonitoringData.end()) {
return ErrorEnum::eNotFound;

Check warning on line 177 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L177

Added line #L177 was not covered by tests
}

instanceData->mSecond.mRunState = runState;

Check warning on line 180 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L180

Added line #L180 was not covered by tests

return ErrorEnum::eNone;
}

Check warning on line 183 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L182-L183

Added lines #L182 - L183 were not covered by tests

Error ResourceMonitor::StopInstanceMonitoring(const String& instanceID)
{
LockGuard lock {mMutex};
Expand Down Expand Up @@ -465,7 +481,7 @@
if (auto err = mResourceUsageProvider->GetNodeMonitoringData(
mNodeMonitoringData.mNodeID, mNodeMonitoringData.mMonitoringData);
!err.IsNone()) {
LOG_ERR() << "Failed to get node monitoring data: " << err;
LOG_ERR() << "Failed to get node monitoring data: err=" << err;

Check warning on line 484 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L484

Added line #L484 was not covered by tests
}

mNodeMonitoringData.mMonitoringData.mCPU = CPUToDMIPs(mNodeMonitoringData.mMonitoringData.mCPU);
Expand All @@ -475,7 +491,11 @@
for (auto& [instanceID, instanceMonitoringData] : mInstanceMonitoringData) {
if (auto err = mResourceUsageProvider->GetInstanceMonitoringData(instanceID, instanceMonitoringData);
!err.IsNone()) {
LOG_ERR() << "Failed to get instance monitoring data: " << err;
if (instanceMonitoringData.mRunState == InstanceRunStateEnum::eActive) {
LOG_ERR() << "Failed to get instance monitoring data: err=" << err;

Check warning on line 495 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L495

Added line #L495 was not covered by tests
}

mlohvynenko marked this conversation as resolved.
Show resolved Hide resolved
continue;

Check warning on line 498 in src/common/monitoring/resourcemonitor.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/monitoring/resourcemonitor.cpp#L498

Added line #L498 was not covered by tests
}

instanceMonitoringData.mMonitoringData.mCPU = CPUToDMIPs(instanceMonitoringData.mMonitoringData.mCPU);
Expand Down
8 changes: 6 additions & 2 deletions src/sm/launcher/instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@
return err;
}

if (auto err = SetupMonitoring(); !err.IsNone()) {
return AOS_ERROR_WRAP(err);

Check warning on line 123 in src/sm/launcher/instance.cpp

View check run for this annotation

Codecov / codecov/patch

src/sm/launcher/instance.cpp#L123

Added line #L123 was not covered by tests
}

auto runStatus = mRunner.StartInstance(mInstanceID, mRuntimeDir, {});

mRunState = runStatus.mState;
Expand All @@ -127,8 +131,8 @@
return AOS_ERROR_WRAP(runStatus.mError);
}

if (auto err = SetupMonitoring(); !err.IsNone()) {
return err;
if (auto err = mResourceMonitor.UpdateInstanceRunState(mInstanceID, mRunState); !err.IsNone()) {
return AOS_ERROR_WRAP(err);

Check warning on line 135 in src/sm/launcher/instance.cpp

View check run for this annotation

Codecov / codecov/patch

src/sm/launcher/instance.cpp#L135

Added line #L135 was not covered by tests
}

return ErrorEnum::eNone;
Expand Down
1 change: 1 addition & 0 deletions tests/include/mocks/monitoringmock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ResourceMonitorMock : public ResourceMonitorItf {
public:
MOCK_METHOD(Error, StartInstanceMonitoring,
(const String& instanceID, const InstanceMonitorParams& monitoringConfig), (override));
MOCK_METHOD(Error, UpdateInstanceRunState, (const String& instanceID, InstanceRunState runState), (override));
MOCK_METHOD(Error, StopInstanceMonitoring, (const String& instanceID), (override));
MOCK_METHOD(Error, GetAverageMonitoringData, (NodeMonitoringData & monitoringData), (override));
};
Expand Down