diff --git a/docs/modules/ROOT/pages/extending/running-jvm-code.adoc b/docs/modules/ROOT/pages/extending/running-jvm-code.adoc index 30655331322..db72b59c336 100644 --- a/docs/modules/ROOT/pages/extending/running-jvm-code.adoc +++ b/docs/modules/ROOT/pages/extending/running-jvm-code.adoc @@ -30,4 +30,8 @@ include::partial$example/extending/jvmcode/1-subprocess.adoc[] == In-process Isolated Classloaders -include::partial$example/extending/jvmcode/2-inprocess.adoc[] +include::partial$example/extending/jvmcode/2-classloader.adoc[] + +== Classloader Worker Tasks + +include::partial$example/extending/jvmcode/3-worker.adoc[] \ No newline at end of file diff --git a/example/extending/jvmcode/2-inprocess/build.mill b/example/extending/jvmcode/2-classloader/build.mill similarity index 86% rename from example/extending/jvmcode/2-inprocess/build.mill rename to example/extending/jvmcode/2-classloader/build.mill index 835f682e191..7522a65b5ba 100644 --- a/example/extending/jvmcode/2-inprocess/build.mill +++ b/example/extending/jvmcode/2-classloader/build.mill @@ -14,7 +14,7 @@ object foo extends JavaModule { def groovyScript = Task.Source(millSourcePath / "generate.groovy") def groovyGeneratedResources = Task{ - Jvm.runInprocess(classPath = groovyClasspath().map(_.path)){ classLoader => + Jvm.runClassloader(classPath = groovyClasspath().map(_.path)){ classLoader => classLoader .loadClass("groovy.ui.GroovyMain") .getMethod("main", classOf[Array[String]]) @@ -34,7 +34,7 @@ object foo extends JavaModule { def resources = super.resources() ++ Seq(groovyGeneratedResources()) } -// Note that unlike `Jvm.runSubprocess`, `Jvm.runInprocess` does not take a `workingDir` +// Note that unlike `Jvm.runSubprocess`, `Jvm.runClassloader` does not take a `workingDir` // on `mainArgs`: it instead provides you an in-memory `classLoader` that contains the // classpath you gave it. From there, you can use `.loadClass` and `.getMethod` to fish out // the classes and methods you want, and `.invoke` to call them. @@ -45,7 +45,7 @@ object foo extends JavaModule { Contents of groovy-generated.html is

Hello!

Groovy!

*/ -// `Jvm.runInprocess` has significantly less overhead than `Jvm.runSubprocess`: both in terms +// `Jvm.runClassloader` has significantly less overhead than `Jvm.runSubprocess`: both in terms // of wall-clock time and in terms of memory footprint. However, it does have somewhat less // isolation, as the code is running inside your JVM and cannot be configured to have a separate // working directory, environment variables, and other process-global configs. Which one is diff --git a/example/extending/jvmcode/2-inprocess/foo/generate.groovy b/example/extending/jvmcode/2-classloader/foo/generate.groovy similarity index 100% rename from example/extending/jvmcode/2-inprocess/foo/generate.groovy rename to example/extending/jvmcode/2-classloader/foo/generate.groovy diff --git a/example/extending/jvmcode/2-inprocess/foo/src/Foo.java b/example/extending/jvmcode/2-classloader/foo/src/Foo.java similarity index 100% rename from example/extending/jvmcode/2-inprocess/foo/src/Foo.java rename to example/extending/jvmcode/2-classloader/foo/src/Foo.java diff --git a/example/extending/jvmcode/3-worker/bar/generate.groovy b/example/extending/jvmcode/3-worker/bar/generate.groovy new file mode 100644 index 00000000000..e3813e24eb5 --- /dev/null +++ b/example/extending/jvmcode/3-worker/bar/generate.groovy @@ -0,0 +1,4 @@ +def htmlContent = "

Hello!

" + args[0] + "

" + +def outputFile = new File(args[1]) +outputFile.write(htmlContent) \ No newline at end of file diff --git a/example/extending/jvmcode/3-worker/bar/src/Bar.java b/example/extending/jvmcode/3-worker/bar/src/Bar.java new file mode 100644 index 00000000000..008500d6cd2 --- /dev/null +++ b/example/extending/jvmcode/3-worker/bar/src/Bar.java @@ -0,0 +1,20 @@ +package bar; + +import java.io.IOException; +import java.io.InputStream; + +public class Bar { + + // Read `file.txt` from classpath + public static String groovyGeneratedHtml() throws IOException { + // Get the resource as an InputStream + try (InputStream inputStream = Bar.class.getClassLoader().getResourceAsStream("groovy-generated.html")) { + return new String(inputStream.readAllBytes()); + } + } + + public static void main(String[] args) throws IOException{ + String appClasspathResourceText = Bar.groovyGeneratedHtml(); + System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText); + } +} diff --git a/example/extending/jvmcode/3-worker/build.mill b/example/extending/jvmcode/3-worker/build.mill new file mode 100644 index 00000000000..3e5f4cf92b5 --- /dev/null +++ b/example/extending/jvmcode/3-worker/build.mill @@ -0,0 +1,85 @@ +// Althought running JVM bytecode via a one-off isolated classloader has less overhead +// than running it in a subprocess, the fact that the classloader needs to be created +// each time adds overhead: newly-created classloaders contain code that is not yet +// optimized by the JVM. When performance matters, you can put the classloader in a +// `Task.Worker` to keep it around, allowing the code internally to be optimized and +// stay optimized without being thrown away each time + +// This example is similar to the earlier example running the Groovy interpreter in +// a subprocess, but instead of using `Jvm.runSubprocess` we use `Jvm.inprocess` to +// load the Groovy interpreter classpath files into an in-memory in-process classloader: + +package build +import mill._, javalib._ +import mill.util.Jvm + +object coursierModule extends CoursierModule + +def groovyClasspath: Task[Agg[PathRef]] = Task{ + coursierModule.defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9")) +} + +def groovyWorker: Worker[java.net.URLClassLoader] = Task.Worker{ + mill.api.ClassLoader.create(groovyClasspath().map(_.path.toIO.toURL).toSeq, parent = null) +} + +trait GroovyGenerateJavaModule extends JavaModule { + def groovyScript = Task.Source(millSourcePath / "generate.groovy") + + def groovyGeneratedResources = Task{ + val oldCl = Thread.currentThread().getContextClassLoader + Thread.currentThread().setContextClassLoader(groovyWorker()) + try { + groovyWorker() + .loadClass("groovy.ui.GroovyMain") + .getMethod("main", classOf[Array[String]]) + .invoke( + null, + Array[String]( + groovyScript().path.toString, + groovyGenerateArg(), + (Task.dest / "groovy-generated.html").toString + ) + ) + } finally Thread.currentThread().setContextClassLoader(oldCl) + PathRef(Task.dest) + } + + def groovyGenerateArg: T[String] + def resources = super.resources() ++ Seq(groovyGeneratedResources()) +} + +object foo extends GroovyGenerateJavaModule{ + def groovyGenerateArg = "Foo Groovy!" +} +object bar extends GroovyGenerateJavaModule{ + def groovyGenerateArg = "Bar Groovy!" +} + +// Here we have two modules `foo` and `bar`, each of which makes use of `groovyWorker` +// to evaluate a groovy script to generate some resources. In this case, we invoke the `main` +// method of `groovy.ui.GroovyMain`, which also happens to require us to set the +// `ContextClassLoader` to work. + + +/** Usage + +> ./mill foo.run +Contents of groovy-generated.html is

Hello!

Foo Groovy!

+ +> ./mill bar.run +Contents of groovy-generated.html is

Hello!

Bar Groovy!

+*/ + + +// Because the `URLClassLoader` within `groovyWorker` is long-lived, the code within the +// classloader can be optimized by the JVM runtime, and would have less overhead than if +// run in separate classloaders via `Jvm.runClassloader`. And because `URLClassLoader` +// already extends `AutoCloseable`, `groovyWorker` gets treated as an +// xref:fundamentals/tasks.adoc#_autoclosable_workers[Autocloseable Worker] automatically. + +// NOTE: As mentioned in documentation for xref:fundamentals/tasks.adoc#_workers[Worker Tasks], +// the classloader contained within `groovyWorker` above is *initialized* in a single-thread, +// but it may be *used* concurrently in a multi-threaded environment. Practically, that means +// that the classes and methods you are invoking within the classloader do not make use of +// un-synchronized global mutable variables. diff --git a/example/extending/jvmcode/3-worker/foo/generate.groovy b/example/extending/jvmcode/3-worker/foo/generate.groovy new file mode 100644 index 00000000000..e3813e24eb5 --- /dev/null +++ b/example/extending/jvmcode/3-worker/foo/generate.groovy @@ -0,0 +1,4 @@ +def htmlContent = "

Hello!

" + args[0] + "

" + +def outputFile = new File(args[1]) +outputFile.write(htmlContent) \ No newline at end of file diff --git a/example/extending/jvmcode/3-worker/foo/src/Foo.java b/example/extending/jvmcode/3-worker/foo/src/Foo.java new file mode 100644 index 00000000000..91a18ebb3b9 --- /dev/null +++ b/example/extending/jvmcode/3-worker/foo/src/Foo.java @@ -0,0 +1,20 @@ +package foo; + +import java.io.IOException; +import java.io.InputStream; + +public class Foo { + + // Read `file.txt` from classpath + public static String groovyGeneratedHtml() throws IOException { + // Get the resource as an InputStream + try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) { + return new String(inputStream.readAllBytes()); + } + } + + public static void main(String[] args) throws IOException{ + String appClasspathResourceText = Foo.groovyGeneratedHtml(); + System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText); + } +} diff --git a/example/fundamentals/tasks/6-workers/build.mill b/example/fundamentals/tasks/6-workers/build.mill index d202fab9491..059d6af938a 100644 --- a/example/fundamentals/tasks/6-workers/build.mill +++ b/example/fundamentals/tasks/6-workers/build.mill @@ -63,6 +63,12 @@ def compressBytes(input: Array[Byte]) = { // 2. Classloaders containing plugin code, to avoid classpath conflicts while // also avoiding classloading cost every time the code is executed // +// NOTE: The _initialization_ of a `Task.Worker`'s value is single threaded, +// but _usage_ of the worker's value may be done concurrently. The user of +// `Task.Worker` is responsible for ensuring it's value is safe to use in a +// multi-threaded environment via techniques like locks, atomics, or concurrent data +// structures +// // Workers live as long as the Mill process. By default, consecutive `mill` // commands in the same folder will re-use the same Mill process and workers, // unless `--no-server` is passed which will terminate the Mill process and diff --git a/main/util/src/mill/util/Jvm.scala b/main/util/src/mill/util/Jvm.scala index d759c4305a8..b599c1c1e5e 100644 --- a/main/util/src/mill/util/Jvm.scala +++ b/main/util/src/mill/util/Jvm.scala @@ -316,7 +316,7 @@ object Jvm extends CoursierSupport { method } - def runInprocess[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit + def runClassloader[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit ctx: mill.api.Ctx.Home ): T = { inprocess(