Skip to content

Commit

Permalink
Trigger GC for actors when they tell the cycle detector they're block…
Browse files Browse the repository at this point in the history
…ed (#3278)

Prior to this commit, if an actor blocked, it did not run GC to free
any memory it no longer needed. This would result in blocked actors
holding on to (potentially lots of) memory unnecessarily.

This commit causes GC to be triggered when the cycle detector asks
an actor if it is blocked and the actor responds telling the cycle
detector that it is blocked. This should result in memory being
held by blocked actors to be freed more quickly even if the cycle
detector doesn't end up detecting a cycle and reaping the actors.

This will force a GC for an actor based on the following three things:

* The actor processed at least one message since it's last GC (i.e. it did some work [GC acquire/release message or app message)
* The actor's heap is greater than 0 (i.e. it has memory that could potentially be freed)
* The actor is blocked and is about to tell the cycle detector that it is blocked (i.e. it thinks it has no more work to do at the moment)

The sequence of events for GC'ing when sending a block message to the CD is:

1. actor gets a message from another actor
2. gets rescheduled because it processed an application message
3. next run has an empty queue (and the actor gets marked internally as blocked but doesn't send a block message to the CD)
4. some time passes and the CD eventually asks the actor if it is blocked
5. the actor garbage collects because of this change before sending the block message to the CD (to prevent race conditions)
6. the actor responds to the CD by sending a block message

This shouldn't be a performance hit because step 4 is based on how often the CD runs (not very often) along with the fact that the CD doesn't ask all actors it knows about if they're blocked on every run, it asks them in batches instead and so step 4 will not occur very frequently for any actor even if steps 1 - 3 happen regularly.
  • Loading branch information
dipinhora authored Aug 8, 2022
1 parent 83ef004 commit e38717d
Showing 1 changed file with 72 additions and 55 deletions.
127 changes: 72 additions & 55 deletions src/libponyrt/actor/actor.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,80 @@ static bool well_formed_msg_chain(pony_msg_t* first, pony_msg_t* last)
}
#endif

static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
{
if(!ponyint_heap_startgc(&actor->heap
#ifdef USE_RUNTIMESTATS
, actor))
#else
))
#endif
return;

#ifdef USE_RUNTIMESTATS
uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.misc_cpu += used_cpu;
#endif

DTRACE1(GC_START, (uintptr_t)ctx->scheduler);

ponyint_gc_mark(ctx);

if(actor->type->trace != NULL)
actor->type->trace(ctx, actor);

ponyint_mark_done(ctx);

#ifdef USE_RUNTIMESTATS
used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
actor->actorstats.gc_mark_cpu += used_cpu;
#endif

ponyint_heap_endgc(&actor->heap
#ifdef USE_RUNTIMESTATS
, actor);
#else
);
#endif

DTRACE1(GC_END, (uintptr_t)ctx->scheduler);

#ifdef USE_RUNTIMESTATS
used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
actor->actorstats.gc_sweep_cpu += used_cpu;
#endif
}

static void send_unblock(pony_actor_t* actor)
{
// Send unblock before continuing.
unset_internal_flag(actor, FLAG_BLOCKED | FLAG_BLOCKED_SENT);
ponyint_cycle_unblock(actor);
}

static void send_block(pony_ctx_t* ctx, pony_actor_t* actor)
{
pony_assert(ctx->current == actor);

// Try and run GC because we're blocked and sending a block message
// to the CD. This will try and free any memory the actor has in its
// heap that wouldn't get freed otherwise until the actor is
// destroyed or happens to receive more work via application messages
// that eventually trigger a GC which may not happen for a long time
// (or ever). Do this BEFORE sending the message or else we might be
// GCing while the CD destroys us.
pony_triggergc(ctx);
try_gc(ctx, actor);


// We're blocked, send block message.
set_internal_flag(actor, FLAG_BLOCKED_SENT);
set_internal_flag(actor, FLAG_CD_CONTACTED);
ponyint_cycle_block(actor, &actor->gc);
}

static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
pony_msg_t* msg)
{
Expand Down Expand Up @@ -366,9 +433,7 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
//
// Sending multiple "i'm blocked" messages to the cycle detector
// will result in actor potentially being freed more than once.
set_internal_flag(actor, FLAG_BLOCKED_SENT);
pony_assert(ctx->current == actor);
ponyint_cycle_block(actor, &actor->gc);
send_block(ctx, actor);
}

return false;
Expand Down Expand Up @@ -444,52 +509,6 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
}
}

static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
{
if(!ponyint_heap_startgc(&actor->heap
#ifdef USE_RUNTIMESTATS
, actor))
#else
))
#endif
return;

#ifdef USE_RUNTIMESTATS
uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.misc_cpu += used_cpu;
#endif

DTRACE1(GC_START, (uintptr_t)ctx->scheduler);

ponyint_gc_mark(ctx);

if(actor->type->trace != NULL)
actor->type->trace(ctx, actor);

ponyint_mark_done(ctx);

#ifdef USE_RUNTIMESTATS
used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
actor->actorstats.gc_mark_cpu += used_cpu;
#endif

ponyint_heap_endgc(&actor->heap
#ifdef USE_RUNTIMESTATS
, actor);
#else
);
#endif

DTRACE1(GC_END, (uintptr_t)ctx->scheduler);

#ifdef USE_RUNTIMESTATS
used_cpu = ponyint_sched_cpu_used(ctx);
ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
actor->actorstats.gc_sweep_cpu += used_cpu;
#endif
}

// return true if mute occurs
static bool maybe_should_mute(pony_actor_t* actor)
{
Expand Down Expand Up @@ -631,13 +650,12 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
)
{
// The cycle detector (CD) doesn't know we exist so it won't try
// and reach out to us even though we're blocked, so send block message //// and set flag that the CD knows we exist now so that when we block
// and reach out to us even though we're blocked, so send block message
// and set flag that the CD knows we exist now so that when we block
// in the future we will wait for the CD to reach out and ask
// if we're blocked or not.
// But, only if gc.rc > 0 because if gc.rc == 0 we are a zombie.
set_internal_flag(actor, FLAG_BLOCKED_SENT);
set_internal_flag(actor, FLAG_CD_CONTACTED);
ponyint_cycle_block(actor, &actor->gc);
send_block(ctx, actor);
}

}
Expand Down Expand Up @@ -748,8 +766,7 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
// unblocked (which would create a race condition) and we've also
// ensured that the cycle detector will not send this actor any more
// messages (which would also create a race condition).
set_internal_flag(actor, FLAG_BLOCKED_SENT);
ponyint_cycle_block(actor, &actor->gc);
send_block(ctx, actor);

// mark the queue as empty or else destroy will hang
bool empty = ponyint_messageq_markempty(&actor->q);
Expand Down

0 comments on commit e38717d

Please sign in to comment.