Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve AcAC to ActorMonitoring #11584

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ project(foundationdb

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")

message (STATUS "${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR}")
message(STATUS "Source code directory: ${PROJECT_SOURCE_DIR}")
message(STATUS "Build directory: ${PROJECT_BINARY_DIR}")
if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
message(FATAL_ERROR "In-source builds are forbidden")
endif()
Expand All @@ -53,15 +54,6 @@ endif()
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)

option(WITH_ACAC "Enable actor stack recording" OFF)
if (WITH_ACAC)
message(STATUS "Build FoundationDB with AcAC support")
if (FDB_RELEASE OR FDB_RELEASE_CANDIDATE)
message(FATAL_ERROR "ACAC will cause severe slowdown of the system and SHOULD not be enabled in Release.")
endif()
add_compile_definitions(WITH_ACAC)
endif()

################################################################################
# Packages used for bindings
################################################################################
Expand Down Expand Up @@ -129,6 +121,9 @@ set(FDB_PREV3_RELEASE_VERSION "7.0.0")
# Flow
################################################################################

# Flags for Actor Monitoring
include(AM)

include(utils)

# Flow and other tools are written in C# - so we need that dependency
Expand Down
20 changes: 20 additions & 0 deletions cmake/AM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Actor Monitor

# Advanced Materials is a peer-reviewed journal covering material topics. Its impact factor is 29.4(2022)
Copy link
Collaborator

@spraza spraza Aug 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this?


set(ACTOR_MONITORING DISABLED CACHE STRING "Actor monitor")
set_property(CACHE ACTOR_MONITORING PROPERTY STRINGS DISABLED MINIMAL FULL)

if ((FDB_RELEASE OR FDB_RELEASE_CANDIDATE) AND NOT (ACTOR_MONITORING STREQUAL "DISABLED"))
message(FATAL_ERROR "AM will cause more than 10% slowdown and should not be used in release")
endif ()

if (ACTOR_MONITORING STREQUAL "DISABLED")
add_compile_definitions(-DACTOR_MONITORING=0)
elseif (ACTOR_MONITORING STREQUAL "MINIMAL")
add_compile_definitions(-DACTOR_MONITORING=1)
elseif (ACTOR_MONITORING STREQUAL "FULL")
add_compile_definitions(-DACTOR_MONITORING=2)
endif ()

message(STATUS "ACTOR monitoring level is ${ACTOR_MONITORING}")
228 changes: 116 additions & 112 deletions flow/ActorContext.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,51 @@
/*
* ActorContext.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2024 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "flow/ActorContext.h"
#include "flow/ActorUID.h"

#ifdef WITH_ACAC
#if ACTOR_MONITORING != ACTOR_MONITORING_DISABLED

#include <iomanip>
#include <iostream>
#include <mutex>
#include <optional>
#include <sstream>
#include <string>

#include "flow/flow.h"
#include "libb64/encode.h"
#include "libb64/decode.h"

#include "flow/ActorUID.h"
#include "flow/flow.h"
#include "flow/GUID.h"

namespace ActorMonitoring {

namespace {

std::vector<ActorExecutionContext> g_currentExecutionContext;

std::unordered_map<ActorID, ActiveActor> g_activeActors;

ActorID getActorID() {
static thread_local ActorID actorID = INIT_ACTOR_ID;
inline ActorID getActorID() {
static thread_local ActorID actorID = ActorMonitoring::INIT_ACTOR_ID;
return ++actorID;
}

Expand All @@ -33,23 +63,38 @@ inline bool isActorOnMainThread() {
// is never called, the N2::thread_network will always be nullptr. In this case, Sim2::isOnMainThread will always
// return false and not reliable.
if (g_network) [[likely]] {
return g_network->isSimulated() ? true : g_network->isOnMainThread();
return g_network->isSimulated() || g_network->isOnMainThread();
} else {
return false;
}
}

inline double gn_now() {
if (g_network == nullptr) [[unlikely]] {
return 0.0;
}
return g_network->now();
}

} // anonymous namespace

using ActiveActorsCount_t = uint32_t;
ActorInfoMinimal::ActorInfoMinimal() : identifier(ActorIdentifier()), id(INVALID_ACTOR_ID), spawner(INVALID_ACTOR_ID) {}

ActiveActor::ActiveActor() : identifier(), id(), spawnTime(0.0), spawner(INVALID_ACTOR_ID) {}
ActorInfoMinimal::ActorInfoMinimal(const ActorIdentifier& identifier_, const ActorID id_, const ActorID spawner_)
: identifier(identifier_), id(id_), spawner(spawner_) {}

ActorInfoFull::ActorInfoFull()
: ActorInfoMinimal(), spawnTime(-1), lastResumeTime(-1), lastYieldTime(-1), numResumes(0) {}

ActorInfoFull::ActorInfoFull(const ActorIdentifier& identifier_, const ActorID id_, const ActorID spawner_)
: ActorInfoMinimal(identifier_, id_, spawner_), spawnTime(-1), lastResumeTime(-1), lastYieldTime(-1), numResumes(-1) {
}

ActiveActor::ActiveActor(const ActorIdentifier& identifier_, const ActorID& id_, const ActorID& spawnerID_)
: identifier(identifier_), id(id_), spawnTime(g_network != nullptr ? g_network->now() : 0.0), spawner(spawnerID_) {}
using ActiveActorsCount_t = uint32_t;

ActiveActorHelper::ActiveActorHelper(const ActorIdentifier& actorIdentifier) {
if (!isActorOnMainThread()) [[unlikely]] {
// Only capture ACTORs on the main thread
return;
}
const auto actorID_ = getActorID();
Expand All @@ -71,6 +116,10 @@ ActorExecutionContextHelper::ActorExecutionContextHelper(const ActorID& actorID_
return;
}
g_currentExecutionContext.emplace_back(actorID_, blockIdentifier_);
#if ACTOR_MONITORING == ACTOR_MONITORING_FULL
g_activeActors[actorID_].lastResumeTime = gn_now();
++g_activeActors[actorID_].numResumes;
#endif
}

ActorExecutionContextHelper::~ActorExecutionContextHelper() {
Expand All @@ -84,124 +133,79 @@ ActorExecutionContextHelper::~ActorExecutionContextHelper() {
g_currentExecutionContext.pop_back();
}

// TODO: Rewrite this function for better display
void dumpActors(std::ostream& stream) {
stream << "Current active ACTORs:" << std::endl;
for (const auto& [actorID, activeActor] : g_activeActors) {
stream << std::setw(10) << actorID << " " << activeActor.identifier.toString() << std::endl;
if (activeActor.spawner != INVALID_ACTOR_ID) {
stream << " Spawn by " << std::setw(10) << activeActor.spawner << std::endl;
}
ActorYieldHelper::ActorYieldHelper(const ActorID& actorID_, const ActorBlockIdentifier& blockIdentifier_) {
#if ACTOR_MONITORING == ACTOR_MONITORING_FULL
if (!isActorOnMainThread()) [[unlikely]] {
return;
}
g_activeActors[actorID_].lastYieldTime = gn_now();
g_activeActors[actorID_].yieldBlockID = blockIdentifier_;
#endif
}

namespace {

std::vector<ActiveActor> getCallBacktraceOfActor(const ActorID& actorID) {
std::vector<ActiveActor> actorBacktrace;
auto currentActorID = actorID;
for (;;) {
if (currentActorID == INIT_ACTOR_ID) {
// Reaching the root
break;
}
if (g_activeActors.count(currentActorID) == 0) {
// TODO: Understand why this happens and react properly
break;
}
actorBacktrace.push_back(g_activeActors.at(currentActorID));
if (g_activeActors.at(currentActorID).spawner != INVALID_ACTOR_ID) {
currentActorID = g_activeActors.at(currentActorID).spawner;
} else {
// TODO: Understand why the actor has no spawner ID
break;
}
}
return actorBacktrace;
void encodeBinaryGUID(BinaryWriter& writer) {
writer << BINARY_GUID;
}

} // anonymous namespace

void dumpActorCallBacktrace() {
std::string backtrace = encodeActorContext(ActorContextDumpType::CURRENT_CALL_BACKTRACE);
std::cout << backtrace << std::endl;
}
} // namespace

std::string encodeActorContext(const ActorContextDumpType dumpType) {
BinaryWriter writer(Unversioned());
auto writeActorInfo = [&writer](const ActiveActor& actor) {
writer << actor.id << actor.identifier << actor.spawner;
};

writer << static_cast<uint8_t>(dumpType)
<< (g_currentExecutionContext.empty() ? INVALID_ACTOR_ID : g_currentExecutionContext.back().actorID);

switch (dumpType) {
case ActorContextDumpType::FULL_CONTEXT:
writer << static_cast<ActiveActorsCount_t>(g_activeActors.size());
for (const auto& [actorID, activeActor] : g_activeActors) {
writeActorInfo(activeActor);
}
break;
case ActorContextDumpType::CURRENT_STACK:
// Only current call stack
{
if (g_currentExecutionContext.empty()) {
writer << static_cast<ActiveActorsCount_t>(0);
break;
}
writer << static_cast<ActiveActorsCount_t>(g_currentExecutionContext.size());
for (const auto& context : g_currentExecutionContext) {
writeActorInfo(g_activeActors.at(context.actorID));
}
}
break;
case ActorContextDumpType::CURRENT_CALL_BACKTRACE:
// The call backtrace of current active actor
{
if (g_currentExecutionContext.empty()) {
writer << static_cast<ActiveActorsCount_t>(0);
break;
}
const auto actors = getCallBacktraceOfActor(g_currentExecutionContext.back().actorID);
writer << static_cast<ActiveActorsCount_t>(actors.size());
for (const auto& item : actors) {
writeActorInfo(item);
}
}
break;
default:
UNREACHABLE();
}

const std::string data = writer.toValue().toString();
return base64::encoder::from_string(data);
encodeBinaryGUID(writer);

return "";
}

DecodedActorContext decodeActorContext(const std::string& caller) {
DecodedActorContext result;
const auto decoded = base64::decoder::from_string(caller);
BinaryReader reader(decoded, Unversioned());

std::underlying_type_t<ActorContextDumpType> dumpTypeRaw;
reader >> dumpTypeRaw;
result.dumpType = static_cast<ActorContextDumpType>(dumpTypeRaw);

reader >> result.currentRunningActor;

ActiveActorsCount_t actorCount;
reader >> actorCount;

std::unordered_map<ActorID, std::tuple<ActorID, ActorIdentifier, ActorID>> actors;
for (ActiveActorsCount_t i = 0; i < actorCount; ++i) {
ActorID id;
ActorID spawner;
ActorIdentifier identifier;
reader >> id >> identifier >> spawner;
result.context.emplace_back(id, identifier, spawner);
return DecodedActorContext();
}

namespace {

auto getActorInfoFromActorID(const ActorID& actorID) -> std::optional<ActorInfo> {
std::optional<ActorInfo> result;

if (auto iter = g_activeActors.find(actorID); iter != std::end(g_activeActors)) {
result.emplace(iter->second);
}

return result;
}

auto getActorDebuggingDataFromIdentifier(const ActorIdentifier& actorIdentifier) -> std::optional<ActorDebuggingData> {
std::optional<ActorDebuggingData> result;

if (auto iter = ACTOR_DEBUGGING_DATA.find(actorIdentifier); iter != std::end(ACTOR_DEBUGGING_DATA)) {
result.emplace(iter->second);
}

return result;
}

#endif // WITH_ACAC
} // namespace

void dumpActorCallBacktrace() {
std::cout << "Length of ACTOR stack: " << g_currentExecutionContext.size() << std::endl;
std::cout << "NumActors=" << g_activeActors.size() << std::endl;
std::cout << "NumDebugDatas=" << ACTOR_DEBUGGING_DATA.size() << std::endl;
for (const auto& block : g_currentExecutionContext) {
std::cout << std::setw(10) << block.actorID << "\t";
if (const auto info = getActorInfoFromActorID(block.actorID); info.has_value()) {
if (const auto debugData = getActorDebuggingDataFromIdentifier(info->identifier); debugData.has_value()) {
std::cout << std::setw(30) << debugData->actorName << "\t" << debugData->path << ":"
<< debugData->lineNumber << std::endl;
} else {
std::cout << "No debug data available" << std::endl;
}
} else {
std::cout << "No ACTOR info" << std::endl;
}
}
}

} // namespace ActorMonitoring

#endif
27 changes: 27 additions & 0 deletions flow/ActorUID.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* FoundationDB ACTOR UID data
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2024 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the 'License');
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an 'AS IS' BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Do not include this file directly.
*/

#include "flow/ActorUID.h"

#define __ACTOR_UID_DATA_H_INCLUDE
// The data is generated after all actors are compiled
#include "flow/ActorUIDData.h"
#undef __ACTOR_UID_DATA_H_INCLUDE
Loading