Skip to content

Commit

Permalink
planner(migration): support migrating to a completely different node (#…
Browse files Browse the repository at this point in the history
…369)

* planner(migration): support migrating to a completely different node if that reduces cross-vm links

* chore: bump code version
  • Loading branch information
csegarragonz authored Feb 2, 2024
1 parent 56c8c45 commit b31abc1
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 20 deletions.
4 changes: 2 additions & 2 deletions .env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FAABRIC_VERSION=0.13.1
FAABRIC_CLI_IMAGE=faasm.azurecr.io/faabric:0.13.1
FAABRIC_VERSION=0.14.0
FAABRIC_CLI_IMAGE=faasm.azurecr.io/faabric:0.14.0
COMPOSE_PROJECT_NAME=faabric-dev
CONAN_CACHE_MOUNT_SOURCE=./conan-cache/
12 changes: 6 additions & 6 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand All @@ -35,7 +35,7 @@ jobs:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand All @@ -49,7 +49,7 @@ jobs:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand All @@ -72,7 +72,7 @@ jobs:
REDIS_QUEUE_HOST: redis
REDIS_STATE_HOST: redis
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand Down Expand Up @@ -114,7 +114,7 @@ jobs:
REDIS_QUEUE_HOST: redis
REDIS_STATE_HOST: redis
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand Down Expand Up @@ -170,7 +170,7 @@ jobs:
REDIS_QUEUE_HOST: redis
REDIS_STATE_HOST: redis
container:
image: faasm.azurecr.io/faabric:0.13.1
image: faasm.azurecr.io/faabric:0.14.0
credentials:
username: ${{ secrets.ACR_SERVICE_PRINCIPAL_ID }}
password: ${{ secrets.ACR_SERVICE_PRINCIPAL_PASSWORD }}
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.13.1
0.14.0
50 changes: 42 additions & 8 deletions src/batch-scheduler/BinPackScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ std::vector<Host> BinPackScheduler::getSortedHosts(
sortedHosts.push_back(host);
}

std::shared_ptr<SchedulingDecision> oldDecision = nullptr;
std::map<std::string, int> hostFreqCount;
if (decisionType != DecisionType::NEW) {
oldDecision = inFlightReqs.at(req->appid()).second;
hostFreqCount = getHostFreqCount(oldDecision);
}

auto isFirstHostLarger = [&](const Host& hostA, const Host& hostB) -> bool {
// The BinPack scheduler sorts hosts by number of available slots
int nAvailableA = numSlotsAvailable(hostA);
Expand All @@ -186,8 +193,6 @@ std::vector<Host> BinPackScheduler::getSortedHosts(
// DIST_CHANGE), the BinPack scheduler takes into consideration the
// existing host-message histogram (i.e. how many messages for this app
// does each host _already_ run)
auto oldDecision = inFlightReqs.at(req->appid()).second;
auto hostFreqCount = getHostFreqCount(oldDecision);

int numInHostA = hostFreqCount.contains(getIp(hostA))
? hostFreqCount.at(getIp(hostA))
Expand All @@ -209,6 +214,23 @@ std::vector<Host> BinPackScheduler::getSortedHosts(
return isFirstHostLarger(hostA, hostB);
};

auto isFirstHostLargerWithFreqTaint = [&](const Host& hostA,
const Host& hostB) -> bool {
// In a DIST_CHANGE decision we want to globally minimise the
// number of cross-VM links (i.e. best BIN_PACK), but break the ties
// with hostFreqCount (i.e. if two hosts have the same number of free
// slots, without counting for the to-be-migrated app, prefer the host
// that is already running messags for this app)
int nAvailableA = numSlotsAvailable(hostA);
int nAvailableB = numSlotsAvailable(hostB);
if (nAvailableA != nAvailableB) {
return nAvailableA > nAvailableB;
}

// In case of a tie, use the same criteria as FREQ count
return isFirstHostLargerWithFreq(hostA, hostB);
};

switch (decisionType) {
case DecisionType::NEW: {
// For a NEW decision type, the BinPack scheduler just sorts the
Expand Down Expand Up @@ -239,18 +261,30 @@ std::vector<Host> BinPackScheduler::getSortedHosts(
// of cross-vm links can be reduced (i.e. we improve locality)
auto oldDecision = inFlightReqs.at(req->appid()).second;
auto hostFreqCount = getHostFreqCount(oldDecision);
std::sort(sortedHosts.begin(),
sortedHosts.end(),
isFirstHostLargerWithFreq);

// Before returning the sorted hosts for dist change, we subtract
// all slots occupied by the application we want to migrate (note
// that we want to take into account for the sorting)
// To decide on a migration opportunity, is like having another
// shot at re-scheduling the app from scratch. Thus, we remove
// the current slots we occupy, and return the largest slots.
// However, in case of a tie, we prefer DIST_CHANGE decisions
// that minimise the number of migrations, so we need to sort
// hosts in decreasing order of capacity BUT break ties with
// frequency
// WARNING: this assumes negligible migration costs

// First remove the slots the app occupies to have a fresh new
// shot at the scheduling
for (auto h : sortedHosts) {
if (hostFreqCount.contains(getIp(h))) {
freeSlots(h, hostFreqCount.at(getIp(h)));
}
}

// Now sort the emptied hosts breaking ties with the freq count
// criteria
std::sort(sortedHosts.begin(),
sortedHosts.end(),
isFirstHostLargerWithFreqTaint);

break;
}
default: {
Expand Down
42 changes: 39 additions & 3 deletions tests/test/batch-scheduler/test_binpack_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,7 @@ TEST_CASE_METHOD(BinPackSchedulerTestFixture,
buildExpectedDecision(ber, { "foo", "foo", "bar", "foo" });
}

SECTION(
"BinPack prefers hosts running more messages (even if less free slots)")
SECTION("BinPack prefers hosts running more slots (even if less messags)")
{
config.hostMap = buildHostMap(
{
Expand All @@ -414,7 +413,7 @@ TEST_CASE_METHOD(BinPackSchedulerTestFixture,
config.inFlightReqs =
buildInFlightReqs(ber, 4, { "foo", "foo", "bar", "baz" });
config.expectedDecision =
buildExpectedDecision(ber, { "foo", "foo", "bar", "foo" });
buildExpectedDecision(ber, { "bar", "bar", "bar", "bar" });
}

SECTION("BinPack always prefers consolidating to fewer hosts")
Expand Down Expand Up @@ -472,6 +471,43 @@ TEST_CASE_METHOD(BinPackSchedulerTestFixture,
buildExpectedDecision(ber, { "foo", "bar", "bar", "foo" });
}

SECTION("BinPack will migrate to completely different hosts if necessary")
{
config.hostMap = buildHostMap(
{
"foo",
"bar",
"baz",
},
{ 4, 4, 4 },
{ 4, 4, 0 });
ber = faabric::util::batchExecFactory("bat", "man", 4);
ber->set_type(BatchExecuteRequest_BatchExecuteType_MIGRATION);
config.inFlightReqs =
buildInFlightReqs(ber, 4, { "foo", "foo", "bar", "bar" });
config.expectedDecision =
buildExpectedDecision(ber, { "baz", "baz", "baz", "baz" });
}

SECTION(
"But in case of a tie will prefer minimising the number of migrations")
{
config.hostMap = buildHostMap(
{
"foo",
"bar",
"baz",
},
{ 4, 4, 4 },
{ 0, 4, 2 });
ber = faabric::util::batchExecFactory("bat", "man", 4);
ber->set_type(BatchExecuteRequest_BatchExecuteType_MIGRATION);
config.inFlightReqs =
buildInFlightReqs(ber, 4, { "baz", "baz", "bar", "bar" });
config.expectedDecision =
buildExpectedDecision(ber, { "baz", "baz", "baz", "baz" });
}

SECTION("BinPack will minimise the number of messages to migrate")
{
config.hostMap =
Expand Down

0 comments on commit b31abc1

Please sign in to comment.