Skip to content

Commit

Permalink
Add example docs for classloader workers (#3796)
Browse files Browse the repository at this point in the history
Fixes #3794

It's still pretty messy, but i'm just documenting the existing APIs that
already exist, and at least it provides a paved path for people to get
_something_ working. Cleaning them up can come separately in
#3775
  • Loading branch information
lihaoyi authored Oct 21, 2024
1 parent 4a074b1 commit c0b6f18
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 5 deletions.
6 changes: 5 additions & 1 deletion docs/modules/ROOT/pages/extending/running-jvm-code.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ include::partial$example/extending/jvmcode/1-subprocess.adoc[]

== In-process Isolated Classloaders

include::partial$example/extending/jvmcode/2-inprocess.adoc[]
include::partial$example/extending/jvmcode/2-classloader.adoc[]

== Classloader Worker Tasks

include::partial$example/extending/jvmcode/3-worker.adoc[]
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ object foo extends JavaModule {
def groovyScript = Task.Source(millSourcePath / "generate.groovy")

def groovyGeneratedResources = Task{
Jvm.runInprocess(classPath = groovyClasspath().map(_.path)){ classLoader =>
Jvm.runClassloader(classPath = groovyClasspath().map(_.path)){ classLoader =>
classLoader
.loadClass("groovy.ui.GroovyMain")
.getMethod("main", classOf[Array[String]])
Expand All @@ -34,7 +34,7 @@ object foo extends JavaModule {
def resources = super.resources() ++ Seq(groovyGeneratedResources())
}

// Note that unlike `Jvm.runSubprocess`, `Jvm.runInprocess` does not take a `workingDir`
// Note that unlike `Jvm.runSubprocess`, `Jvm.runClassloader` does not take a `workingDir`
// on `mainArgs`: it instead provides you an in-memory `classLoader` that contains the
// classpath you gave it. From there, you can use `.loadClass` and `.getMethod` to fish out
// the classes and methods you want, and `.invoke` to call them.
Expand All @@ -45,7 +45,7 @@ object foo extends JavaModule {
Contents of groovy-generated.html is <html><body><h1>Hello!</h1><p>Groovy!</p></body></html>
*/

// `Jvm.runInprocess` has significantly less overhead than `Jvm.runSubprocess`: both in terms
// `Jvm.runClassloader` has significantly less overhead than `Jvm.runSubprocess`: both in terms
// of wall-clock time and in terms of memory footprint. However, it does have somewhat less
// isolation, as the code is running inside your JVM and cannot be configured to have a separate
// working directory, environment variables, and other process-global configs. Which one is
Expand Down
4 changes: 4 additions & 0 deletions example/extending/jvmcode/3-worker/bar/generate.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def htmlContent = "<html><body><h1>Hello!</h1><p>" + args[0] + "</p></body></html>"

def outputFile = new File(args[1])
outputFile.write(htmlContent)
20 changes: 20 additions & 0 deletions example/extending/jvmcode/3-worker/bar/src/Bar.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package bar;

import java.io.IOException;
import java.io.InputStream;

public class Bar {

// Read `file.txt` from classpath
public static String groovyGeneratedHtml() throws IOException {
// Get the resource as an InputStream
try (InputStream inputStream = Bar.class.getClassLoader().getResourceAsStream("groovy-generated.html")) {
return new String(inputStream.readAllBytes());
}
}

public static void main(String[] args) throws IOException{
String appClasspathResourceText = Bar.groovyGeneratedHtml();
System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText);
}
}
85 changes: 85 additions & 0 deletions example/extending/jvmcode/3-worker/build.mill
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Althought running JVM bytecode via a one-off isolated classloader has less overhead
// than running it in a subprocess, the fact that the classloader needs to be created
// each time adds overhead: newly-created classloaders contain code that is not yet
// optimized by the JVM. When performance matters, you can put the classloader in a
// `Task.Worker` to keep it around, allowing the code internally to be optimized and
// stay optimized without being thrown away each time

// This example is similar to the earlier example running the Groovy interpreter in
// a subprocess, but instead of using `Jvm.runSubprocess` we use `Jvm.inprocess` to
// load the Groovy interpreter classpath files into an in-memory in-process classloader:

package build
import mill._, javalib._
import mill.util.Jvm

object coursierModule extends CoursierModule

def groovyClasspath: Task[Agg[PathRef]] = Task{
coursierModule.defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9"))
}

def groovyWorker: Worker[java.net.URLClassLoader] = Task.Worker{
mill.api.ClassLoader.create(groovyClasspath().map(_.path.toIO.toURL).toSeq, parent = null)
}

trait GroovyGenerateJavaModule extends JavaModule {
def groovyScript = Task.Source(millSourcePath / "generate.groovy")

def groovyGeneratedResources = Task{
val oldCl = Thread.currentThread().getContextClassLoader
Thread.currentThread().setContextClassLoader(groovyWorker())
try {
groovyWorker()
.loadClass("groovy.ui.GroovyMain")
.getMethod("main", classOf[Array[String]])
.invoke(
null,
Array[String](
groovyScript().path.toString,
groovyGenerateArg(),
(Task.dest / "groovy-generated.html").toString
)
)
} finally Thread.currentThread().setContextClassLoader(oldCl)
PathRef(Task.dest)
}

def groovyGenerateArg: T[String]
def resources = super.resources() ++ Seq(groovyGeneratedResources())
}

object foo extends GroovyGenerateJavaModule{
def groovyGenerateArg = "Foo Groovy!"
}
object bar extends GroovyGenerateJavaModule{
def groovyGenerateArg = "Bar Groovy!"
}

// Here we have two modules `foo` and `bar`, each of which makes use of `groovyWorker`
// to evaluate a groovy script to generate some resources. In this case, we invoke the `main`
// method of `groovy.ui.GroovyMain`, which also happens to require us to set the
// `ContextClassLoader` to work.


/** Usage

> ./mill foo.run
Contents of groovy-generated.html is <html><body><h1>Hello!</h1><p>Foo Groovy!</p></body></html>

> ./mill bar.run
Contents of groovy-generated.html is <html><body><h1>Hello!</h1><p>Bar Groovy!</p></body></html>
*/


// Because the `URLClassLoader` within `groovyWorker` is long-lived, the code within the
// classloader can be optimized by the JVM runtime, and would have less overhead than if
// run in separate classloaders via `Jvm.runClassloader`. And because `URLClassLoader`
// already extends `AutoCloseable`, `groovyWorker` gets treated as an
// xref:fundamentals/tasks.adoc#_autoclosable_workers[Autocloseable Worker] automatically.

// NOTE: As mentioned in documentation for xref:fundamentals/tasks.adoc#_workers[Worker Tasks],
// the classloader contained within `groovyWorker` above is *initialized* in a single-thread,
// but it may be *used* concurrently in a multi-threaded environment. Practically, that means
// that the classes and methods you are invoking within the classloader do not make use of
// un-synchronized global mutable variables.
4 changes: 4 additions & 0 deletions example/extending/jvmcode/3-worker/foo/generate.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def htmlContent = "<html><body><h1>Hello!</h1><p>" + args[0] + "</p></body></html>"

def outputFile = new File(args[1])
outputFile.write(htmlContent)
20 changes: 20 additions & 0 deletions example/extending/jvmcode/3-worker/foo/src/Foo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package foo;

import java.io.IOException;
import java.io.InputStream;

public class Foo {

// Read `file.txt` from classpath
public static String groovyGeneratedHtml() throws IOException {
// Get the resource as an InputStream
try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) {
return new String(inputStream.readAllBytes());
}
}

public static void main(String[] args) throws IOException{
String appClasspathResourceText = Foo.groovyGeneratedHtml();
System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText);
}
}
6 changes: 6 additions & 0 deletions example/fundamentals/tasks/6-workers/build.mill
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def compressBytes(input: Array[Byte]) = {
// 2. Classloaders containing plugin code, to avoid classpath conflicts while
// also avoiding classloading cost every time the code is executed
//
// NOTE: The _initialization_ of a `Task.Worker`'s value is single threaded,
// but _usage_ of the worker's value may be done concurrently. The user of
// `Task.Worker` is responsible for ensuring it's value is safe to use in a
// multi-threaded environment via techniques like locks, atomics, or concurrent data
// structures
//
// Workers live as long as the Mill process. By default, consecutive `mill`
// commands in the same folder will re-use the same Mill process and workers,
// unless `--no-server` is passed which will terminate the Mill process and
Expand Down
2 changes: 1 addition & 1 deletion main/util/src/mill/util/Jvm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ object Jvm extends CoursierSupport {
method
}

def runInprocess[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit
def runClassloader[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit
ctx: mill.api.Ctx.Home
): T = {
inprocess(
Expand Down

0 comments on commit c0b6f18

Please sign in to comment.