From e38717d2319e12310a05292d1fa71ed0da24f719 Mon Sep 17 00:00:00 2001
From: Dipin Hora <dipinhora+github@gmail.com>
Date: Sun, 7 Aug 2022 22:08:20 -0400
Subject: [PATCH] Trigger GC for actors when they tell the cycle detector
 they're blocked (#3278)

Prior to this commit, if an actor blocked, it did not run GC to free
any memory it no longer needed. This would result in blocked actors
holding on to (potentially lots of) memory unnecessarily.

This commit causes GC to be triggered when the cycle detector asks
an actor if it is blocked and the actor responds telling the cycle
detector that it is blocked. This should result in memory being
held by blocked actors to be freed more quickly even if the cycle
detector doesn't end up detecting a cycle and reaping the actors.

This will force a GC for an actor based on the following three things:

* The actor processed at least one message since it's last GC (i.e. it did some work [GC acquire/release message or app message)
* The actor's heap is greater than 0 (i.e. it has memory that could potentially be freed)
* The actor is blocked and is about to tell the cycle detector that it is blocked (i.e. it thinks it has no more work to do at the moment)

The sequence of events for GC'ing when sending a block message to the CD is:

1. actor gets a message from another actor
2. gets rescheduled because it processed an application message
3. next run has an empty queue (and the actor gets marked internally as blocked but doesn't send a block message to the CD)
4. some time passes and the CD eventually asks the actor if it is blocked
5. the actor garbage collects because of this change before sending the block message to the CD (to prevent race conditions)
6. the actor responds to the CD by sending a block message

This shouldn't be a performance hit because step 4 is based on how often the CD runs (not very often) along with the fact that the CD doesn't ask all actors it knows about if they're blocked on every run, it asks them in batches instead and so step 4 will not occur very frequently for any actor even if steps 1 - 3 happen regularly.
---
 src/libponyrt/actor/actor.c | 127 ++++++++++++++++++++----------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/src/libponyrt/actor/actor.c b/src/libponyrt/actor/actor.c
index 51979432b5..ad73a0e100 100644
--- a/src/libponyrt/actor/actor.c
+++ b/src/libponyrt/actor/actor.c
@@ -241,6 +241,52 @@ static bool well_formed_msg_chain(pony_msg_t* first, pony_msg_t* last)
 }
 #endif
 
+static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
+{
+  if(!ponyint_heap_startgc(&actor->heap
+#ifdef USE_RUNTIMESTATS
+  , actor))
+#else
+  ))
+#endif
+    return;
+
+#ifdef USE_RUNTIMESTATS
+    uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
+    ctx->schedulerstats.misc_cpu += used_cpu;
+#endif
+
+  DTRACE1(GC_START, (uintptr_t)ctx->scheduler);
+
+  ponyint_gc_mark(ctx);
+
+  if(actor->type->trace != NULL)
+    actor->type->trace(ctx, actor);
+
+  ponyint_mark_done(ctx);
+
+#ifdef USE_RUNTIMESTATS
+    used_cpu = ponyint_sched_cpu_used(ctx);
+    ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
+    actor->actorstats.gc_mark_cpu += used_cpu;
+#endif
+
+  ponyint_heap_endgc(&actor->heap
+#ifdef USE_RUNTIMESTATS
+  , actor);
+#else
+  );
+#endif
+
+  DTRACE1(GC_END, (uintptr_t)ctx->scheduler);
+
+#ifdef USE_RUNTIMESTATS
+    used_cpu = ponyint_sched_cpu_used(ctx);
+    ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
+    actor->actorstats.gc_sweep_cpu += used_cpu;
+#endif
+}
+
 static void send_unblock(pony_actor_t* actor)
 {
   // Send unblock before continuing.
@@ -248,6 +294,27 @@ static void send_unblock(pony_actor_t* actor)
   ponyint_cycle_unblock(actor);
 }
 
+static void send_block(pony_ctx_t* ctx, pony_actor_t* actor)
+{
+  pony_assert(ctx->current == actor);
+
+  // Try and run GC because we're blocked and sending a block message
+  // to the CD. This will try and free any memory the actor has in its
+  // heap that wouldn't get freed otherwise until the actor is
+  // destroyed or happens to receive more work via application messages
+  // that eventually trigger a GC which may not happen for a long time
+  // (or ever). Do this BEFORE sending the message or else we might be
+  // GCing while the CD destroys us.
+  pony_triggergc(ctx);
+  try_gc(ctx, actor);
+
+
+  // We're blocked, send block message.
+  set_internal_flag(actor, FLAG_BLOCKED_SENT);
+  set_internal_flag(actor, FLAG_CD_CONTACTED);
+  ponyint_cycle_block(actor, &actor->gc);
+}
+
 static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
   pony_msg_t* msg)
 {
@@ -366,9 +433,7 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
         //
         // Sending multiple "i'm blocked" messages to the cycle detector
         // will result in actor potentially being freed more than once.
-        set_internal_flag(actor, FLAG_BLOCKED_SENT);
-        pony_assert(ctx->current == actor);
-        ponyint_cycle_block(actor, &actor->gc);
+        send_block(ctx, actor);
       }
 
       return false;
@@ -444,52 +509,6 @@ static bool handle_message(pony_ctx_t* ctx, pony_actor_t* actor,
   }
 }
 
-static void try_gc(pony_ctx_t* ctx, pony_actor_t* actor)
-{
-  if(!ponyint_heap_startgc(&actor->heap
-#ifdef USE_RUNTIMESTATS
-  , actor))
-#else
-  ))
-#endif
-    return;
-
-#ifdef USE_RUNTIMESTATS
-    uint64_t used_cpu = ponyint_sched_cpu_used(ctx);
-    ctx->schedulerstats.misc_cpu += used_cpu;
-#endif
-
-  DTRACE1(GC_START, (uintptr_t)ctx->scheduler);
-
-  ponyint_gc_mark(ctx);
-
-  if(actor->type->trace != NULL)
-    actor->type->trace(ctx, actor);
-
-  ponyint_mark_done(ctx);
-
-#ifdef USE_RUNTIMESTATS
-    used_cpu = ponyint_sched_cpu_used(ctx);
-    ctx->schedulerstats.actor_gc_mark_cpu += used_cpu;
-    actor->actorstats.gc_mark_cpu += used_cpu;
-#endif
-
-  ponyint_heap_endgc(&actor->heap
-#ifdef USE_RUNTIMESTATS
-  , actor);
-#else
-  );
-#endif
-
-  DTRACE1(GC_END, (uintptr_t)ctx->scheduler);
-
-#ifdef USE_RUNTIMESTATS
-    used_cpu = ponyint_sched_cpu_used(ctx);
-    ctx->schedulerstats.actor_gc_sweep_cpu += used_cpu;
-    actor->actorstats.gc_sweep_cpu += used_cpu;
-#endif
-}
-
 // return true if mute occurs
 static bool maybe_should_mute(pony_actor_t* actor)
 {
@@ -631,13 +650,12 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
     )
     {
       // The cycle detector (CD) doesn't know we exist so it won't try
-      // and reach out to us even though we're blocked, so send block message //// and set flag that the CD knows we exist now so that when we block
+      // and reach out to us even though we're blocked, so send block message
+      // and set flag that the CD knows we exist now so that when we block
       // in the future we will wait for the CD to reach out and ask
       // if we're blocked or not.
       // But, only if gc.rc > 0 because if gc.rc == 0 we are a zombie.
-      set_internal_flag(actor, FLAG_BLOCKED_SENT);
-      set_internal_flag(actor, FLAG_CD_CONTACTED);
-      ponyint_cycle_block(actor, &actor->gc);
+      send_block(ctx, actor);
     }
 
   }
@@ -748,8 +766,7 @@ bool ponyint_actor_run(pony_ctx_t* ctx, pony_actor_t* actor, bool polling)
         // unblocked (which would create a race condition) and we've also
         // ensured that the cycle detector will not send this actor any more
         // messages (which would also create a race condition).
-        set_internal_flag(actor, FLAG_BLOCKED_SENT);
-        ponyint_cycle_block(actor, &actor->gc);
+        send_block(ctx, actor);
 
         // mark the queue as empty or else destroy will hang
         bool empty = ponyint_messageq_markempty(&actor->q);