diff --git a/AnomalyDetection/Core/pom.xml b/AnomalyDetection/Core/pom.xml index 3b10e0e11..374cdeb6c 100644 --- a/AnomalyDetection/Core/pom.xml +++ b/AnomalyDetection/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-anomaly - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml AnomalyDetection-Core diff --git a/AnomalyDetection/LibLinear/pom.xml b/AnomalyDetection/LibLinear/pom.xml index 3494cc444..12712f101 100644 --- a/AnomalyDetection/LibLinear/pom.xml +++ b/AnomalyDetection/LibLinear/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-anomaly - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml AnomalyDetection-LibLinear diff --git a/AnomalyDetection/LibSVM/pom.xml b/AnomalyDetection/LibSVM/pom.xml index 44f45705f..131af27ab 100644 --- a/AnomalyDetection/LibSVM/pom.xml +++ b/AnomalyDetection/LibSVM/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-anomaly - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml AnomalyDetection-LibSVM diff --git a/AnomalyDetection/pom.xml b/AnomalyDetection/pom.xml index 5eb42a852..8679ed8c1 100644 --- a/AnomalyDetection/pom.xml +++ b/AnomalyDetection/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-anomaly diff --git a/Classification/Core/pom.xml b/Classification/Core/pom.xml index dadf3c4ff..d04e19721 100644 --- a/Classification/Core/pom.xml +++ b/Classification/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-Core diff --git a/Classification/DecisionTree/pom.xml b/Classification/DecisionTree/pom.xml index 871e7714a..1b8dc5184 100644 --- a/Classification/DecisionTree/pom.xml +++ b/Classification/DecisionTree/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-Tree diff --git a/Classification/Experiments/pom.xml b/Classification/Experiments/pom.xml index 462b2d383..95f8d87f7 100644 --- a/Classification/Experiments/pom.xml +++ b/Classification/Experiments/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-Experiments diff --git a/Classification/Explanations/pom.xml b/Classification/Explanations/pom.xml index 6fb30cf2f..cd3d3c185 100644 --- a/Classification/Explanations/pom.xml +++ b/Classification/Explanations/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-Explanations diff --git a/Classification/LibLinear/pom.xml b/Classification/LibLinear/pom.xml index 6f7b9c254..3e3aaa6f8 100644 --- a/Classification/LibLinear/pom.xml +++ b/Classification/LibLinear/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-LibLinear diff --git a/Classification/LibSVM/pom.xml b/Classification/LibSVM/pom.xml index bd98db140..8d828c91f 100644 --- a/Classification/LibSVM/pom.xml +++ b/Classification/LibSVM/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-LibSVM diff --git a/Classification/MultinomialNaiveBayes/pom.xml b/Classification/MultinomialNaiveBayes/pom.xml index c918de3f9..67de36e58 100644 --- a/Classification/MultinomialNaiveBayes/pom.xml +++ b/Classification/MultinomialNaiveBayes/pom.xml @@ -22,7 +22,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 Classification-MultinomialNaiveBayes tribuo-classification-mnnaivebayes diff --git a/Classification/SGD/pom.xml b/Classification/SGD/pom.xml index 79cc4d22f..5c7982c12 100644 --- a/Classification/SGD/pom.xml +++ b/Classification/SGD/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-SGD diff --git a/Classification/XGBoost/pom.xml b/Classification/XGBoost/pom.xml index 640ef376f..a99eb6c6e 100644 --- a/Classification/XGBoost/pom.xml +++ b/Classification/XGBoost/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-classification - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Classification-XGBoost diff --git a/Classification/pom.xml b/Classification/pom.xml index 10bc8f8f9..47d5f544f 100644 --- a/Classification/pom.xml +++ b/Classification/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-classification diff --git a/Clustering/Core/pom.xml b/Clustering/Core/pom.xml index b9fe82fd0..490e39065 100644 --- a/Clustering/Core/pom.xml +++ b/Clustering/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-clustering - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Clustering-Core diff --git a/Clustering/Hdbscan/pom.xml b/Clustering/Hdbscan/pom.xml index e6caaed43..4116dea84 100644 --- a/Clustering/Hdbscan/pom.xml +++ b/Clustering/Hdbscan/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-clustering - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Clustering-Hdbscan diff --git a/Clustering/KMeans/pom.xml b/Clustering/KMeans/pom.xml index 15f849d8e..155c1a350 100644 --- a/Clustering/KMeans/pom.xml +++ b/Clustering/KMeans/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-clustering - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Clustering-KMeans diff --git a/Clustering/pom.xml b/Clustering/pom.xml index 44bb0e371..bf1bcf32e 100644 --- a/Clustering/pom.xml +++ b/Clustering/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-clustering diff --git a/Common/LibLinear/pom.xml b/Common/LibLinear/pom.xml index 5ff8d6252..d67fa2ccf 100644 --- a/Common/LibLinear/pom.xml +++ b/Common/LibLinear/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-LibLinear diff --git a/Common/LibSVM/pom.xml b/Common/LibSVM/pom.xml index 1dc9fc3ea..2dbdaca26 100644 --- a/Common/LibSVM/pom.xml +++ b/Common/LibSVM/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-LibSVM diff --git a/Common/NearestNeighbour/pom.xml b/Common/NearestNeighbour/pom.xml index cf458f45c..868e39e71 100644 --- a/Common/NearestNeighbour/pom.xml +++ b/Common/NearestNeighbour/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-NearestNeighbour diff --git a/Common/SGD/pom.xml b/Common/SGD/pom.xml index a94d2dfae..185f82117 100644 --- a/Common/SGD/pom.xml +++ b/Common/SGD/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-SGD diff --git a/Common/Trees/pom.xml b/Common/Trees/pom.xml index fc929a87f..d3c273237 100644 --- a/Common/Trees/pom.xml +++ b/Common/Trees/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-Tree diff --git a/Common/XGBoost/pom.xml b/Common/XGBoost/pom.xml index 2e6eb4034..693732895 100644 --- a/Common/XGBoost/pom.xml +++ b/Common/XGBoost/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-common - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Common-XGBoost diff --git a/Common/pom.xml b/Common/pom.xml index 252617bef..7f50f0297 100644 --- a/Common/pom.xml +++ b/Common/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-common diff --git a/Core/pom.xml b/Core/pom.xml index 875a7d665..9abaaca27 100644 --- a/Core/pom.xml +++ b/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Core diff --git a/Data/pom.xml b/Data/pom.xml index d48a0d2e8..27373b730 100644 --- a/Data/pom.xml +++ b/Data/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Data diff --git a/Interop/Core/pom.xml b/Interop/Core/pom.xml index e481c8461..53ad6fd6b 100644 --- a/Interop/Core/pom.xml +++ b/Interop/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-interop - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Interop-Core diff --git a/Interop/OCI/pom.xml b/Interop/OCI/pom.xml index b479cad6e..6aa4e281e 100644 --- a/Interop/OCI/pom.xml +++ b/Interop/OCI/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-interop - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml OCI diff --git a/Interop/ONNX/pom.xml b/Interop/ONNX/pom.xml index 3c5023369..b89dc5a85 100644 --- a/Interop/ONNX/pom.xml +++ b/Interop/ONNX/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-interop - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Interop-ONNX diff --git a/Interop/Tensorflow/pom.xml b/Interop/Tensorflow/pom.xml index e04fc119d..d5c6fecf7 100644 --- a/Interop/Tensorflow/pom.xml +++ b/Interop/Tensorflow/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-interop - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Interop-Tensorflow diff --git a/Interop/pom.xml b/Interop/pom.xml index 56b0c0252..79e812d08 100644 --- a/Interop/pom.xml +++ b/Interop/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-interop diff --git a/Json/pom.xml b/Json/pom.xml index dcf76472d..4c82a4355 100644 --- a/Json/pom.xml +++ b/Json/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 4.0.0 Json diff --git a/Math/pom.xml b/Math/pom.xml index 212fa37ab..af713ae8c 100644 --- a/Math/pom.xml +++ b/Math/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Math diff --git a/MultiLabel/Core/pom.xml b/MultiLabel/Core/pom.xml index 8f70b4d8f..1c255d12f 100644 --- a/MultiLabel/Core/pom.xml +++ b/MultiLabel/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-multilabel - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml MultiLabel-Core diff --git a/MultiLabel/SGD/pom.xml b/MultiLabel/SGD/pom.xml index d9b45b398..3fdb28cbd 100644 --- a/MultiLabel/SGD/pom.xml +++ b/MultiLabel/SGD/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-multilabel - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml MultiLabel-SGD diff --git a/MultiLabel/pom.xml b/MultiLabel/pom.xml index e254bb591..074f61827 100644 --- a/MultiLabel/pom.xml +++ b/MultiLabel/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-multilabel diff --git a/README.md b/README.md index 398aaf60e..58aecc830 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

Tribuo Logo

-# Tribuo - A Java prediction library (v4.1) +# Tribuo - A Java prediction library (v4.2) [Tribuo](https://tribuo.org) is a machine learning library in Java that provides multi-class classification, regression, clustering, anomaly detection @@ -26,7 +26,8 @@ trainer. In the case of evaluations, this provenance information also includes the specific model used. Provenance information can be extracted as JSON, or serialised directly using Java serialisation. For production deployments, provenance information can be redacted and replaced with a hash to provide -model tracking through an external system. +model tracking through an external system. Many Tribuo models can be exported +in ONNX format for deployment in other languages, platforms or cloud services. Tribuo runs on Java 8+, and we test on LTS versions of Java along with the latest release. Tribuo itself is a pure Java library and is supported on all @@ -44,7 +45,7 @@ deployment. * [Library Architecture](docs/Architecture.md) * [Package Overview](docs/PackageOverview.md) -* Javadoc [4.1](https://tribuo.org/learn/4.1/javadoc/), [4.0](https://tribuo.org/learn/4.0/javadoc/) +* Javadoc [4.2](https://tribuo.org/learn/4.2/javadoc), [4.1](https://tribuo.org/learn/4.1/javadoc/), [4.0](https://tribuo.org/learn/4.0/javadoc/) * [Helper Programs](docs/HelperPrograms.md) * [Developer Documentation](docs/Internals.md) * [Roadmap](docs/Roadmap.md) @@ -54,11 +55,12 @@ deployment. Tutorial notebooks, including examples of Classification, Clustering, Regression, Anomaly Detection, TensorFlow, document classification, columnar -data loading, working with externally trained models, and the configuration -system, can be found in the [tutorials](tutorials). These use the -[IJava](https://github.com/SpencerPark/IJava) Jupyter notebook kernel, and -work with Java 10+. To convert the tutorials' code back to Java 8, in most -cases simply replace the `var` keyword with the appropriate types. +data loading, working with externally trained models, and the configuration +system, can be found in the [tutorials](tutorials). These use the +[IJava](https://github.com/SpencerPark/IJava) Jupyter notebook kernel, and work +with Java 10+, except the reproducibility tutotiral which requires Java 17. To +convert the tutorials' code back to Java 8, in most cases simply replace the +`var` keyword with the appropriate types. ## Algorithms @@ -78,7 +80,7 @@ of prediction tasks: The ensembles and K-NN use a combination function to produce their output. These combiners are prediction task specific, but the ensemble & K-NN implementations are task agnostic. We provide voting and averaging combiners -for classification and regression tasks. +for multi-class classification, multi-label classification and regression tasks. ### Classification @@ -99,9 +101,10 @@ Tribuo has implementations or interfaces for: Tribuo also supplies a linear chain CRF for sequence classification tasks. This CRF is trained via SGD using any of Tribuo's gradient optimizers. -To explain classifier predictions there is an implementation of the LIME algorithm. Tribuo's -implementation allows the mixing of text and tabular data, along with the use of any sparse model -as an explainer (e.g., regression trees, lasso etc), however it does not support images. +To explain classifier predictions there is an implementation of the LIME +algorithm. Tribuo's implementation allows the mixing of text and tabular data, +along with the use of any sparse model as an explainer (e.g., regression trees, +lasso etc), however it does not support images. ### Regression @@ -181,13 +184,13 @@ Maven: org.tribuo tribuo-all - 4.1.0 + 4.2.0 pom ``` or from Gradle: ```groovy -implementation ("org.tribuo:tribuo-all:4.1.0@pom") { +implementation ("org.tribuo:tribuo-all:4.2.0@pom") { transitive = true // for build.gradle (i.e., Groovy) // isTransitive = true // for build.gradle.kts (i.e., Kotlin) } @@ -201,12 +204,12 @@ interfaces link to libraries which use native code. Those interfaces (TensorFlow, ONNX Runtime and XGBoost) only run on supported platforms for the respective published binaries, and Tribuo has no control over which binaries are supplied. If you need support for a specific platform, reach out to the -maintainers of those projects. As of the 4.1 release these native packages -all provide x86\_64 binaries for Windows, macOS and Linux. It is also possible -to compile each package for macOS ARM64 (i.e., Apple Silicon), though there are -no binaries available on Maven Central for that platform. When developing -on an ARM platform you can select the `arm` profile in Tribuo's pom.xml to -disable the native library tests. +maintainers of those projects. As of the 4.1 release these native packages all +provide x86\_64 binaries for Windows, macOS and Linux. It is also possible to +compile each package for macOS ARM64 (i.e., Apple Silicon), though there are no +binaries available on Maven Central for that platform. When developing on an +ARM platform you can select the `arm` profile in Tribuo's pom.xml to disable +the native library tests. Individual jars are published for each Tribuo module. It is preferable to depend only on the modules necessary for the specific project. This prevents @@ -251,10 +254,12 @@ Tribuo is licensed under the [Apache 2.0 License](./LICENSE.txt). ## Release Notes: -- v4.1.0 - Added TensorFlow training support, a BERT feature extractor, ExtraTrees, K-Means++, many linear model & CRF performance improvements, new tutorials on TF and document classification. Many bug fixes & documentation improvements. -- v4.0.2 - Many bug fixes (CSVDataSource, JsonDataSource, RowProcessor, LibSVMTrainer, Evaluations, Regressor serialization). Improved javadoc and documentation. Added two new tutorials (columnar data and external models). -- v4.0.1 - Bugfix for CSVReader to cope with blank lines, added IDXDataSource to allow loading of native MNIST format data. -- v4.0.0 - Initial public release. +- [v4.2.0](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-2-release-notes.md) - Added factorization machines, classifier chains, HDBSCAN. Added ONNX export and OCI Data Science integration. Added reproducibility framework. Various other small fixes and improvements, including the regression fixes from v4.1.1. Filled out the remaining javadoc, added 4 new tutorials (onnx export, multi-label classification, reproducibility, hdbscan), expanded existing tutorials. +- [v4.1.1](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-1-1-release-notes.md) - Bug fixes for multi-output regression, multi-label evaluation, KMeans & KNN with SecurityManager, and update TF-Java 0.4.0. +- [v4.1.0](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-1-release-notes.md) - Added TensorFlow training support, a BERT feature extractor, ExtraTrees, K-Means++, many linear model & CRF performance improvements, new tutorials on TF and document classification. Many bug fixes & documentation improvements. +- [v4.0.2](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-0-2-release-notes.md) - Many bug fixes (CSVDataSource, JsonDataSource, RowProcessor, LibSVMTrainer, Evaluations, Regressor serialization). Improved javadoc and documentation. Added two new tutorials (columnar data and external models). +- [v4.0.1](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-0-1-release-notes.md) - Bugfix for CSVReader to cope with blank lines, added IDXDataSource to allow loading of native MNIST format data. +- [v4.0.0](https://github.com/oracle/tribuo/blob/main/docs/release-notes/tribuo-v4-0-release-notes.md) - Initial public release. - v3 - Added provenance system, the external model support and onnx integrations. - v2 - Expanded beyond a classification system, to support regression, clustering and multi-label classification. - v1 - Initial internal release. This release only supported multi-class classification. diff --git a/Regression/Core/pom.xml b/Regression/Core/pom.xml index cbcfa0534..bf0f2a7a3 100644 --- a/Regression/Core/pom.xml +++ b/Regression/Core/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-Core diff --git a/Regression/LibLinear/pom.xml b/Regression/LibLinear/pom.xml index 60522800d..ad1b51be8 100644 --- a/Regression/LibLinear/pom.xml +++ b/Regression/LibLinear/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-LibLinear diff --git a/Regression/LibSVM/pom.xml b/Regression/LibSVM/pom.xml index b64bd14be..a70121f19 100644 --- a/Regression/LibSVM/pom.xml +++ b/Regression/LibSVM/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-LibSVM diff --git a/Regression/RegressionTree/pom.xml b/Regression/RegressionTree/pom.xml index 0138b1bc7..040befb00 100644 --- a/Regression/RegressionTree/pom.xml +++ b/Regression/RegressionTree/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-Tree diff --git a/Regression/SGD/pom.xml b/Regression/SGD/pom.xml index 6544746bc..4b8c2a6ae 100644 --- a/Regression/SGD/pom.xml +++ b/Regression/SGD/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-SGD diff --git a/Regression/SLM/pom.xml b/Regression/SLM/pom.xml index a0852bfed..96451d4d8 100644 --- a/Regression/SLM/pom.xml +++ b/Regression/SLM/pom.xml @@ -20,7 +20,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-SLM diff --git a/Regression/XGBoost/pom.xml b/Regression/XGBoost/pom.xml index 292b90ae1..44b7ae1ba 100644 --- a/Regression/XGBoost/pom.xml +++ b/Regression/XGBoost/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-regression - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Regression-XGBoost diff --git a/Regression/pom.xml b/Regression/pom.xml index 9994f5463..d8012f4fb 100644 --- a/Regression/pom.xml +++ b/Regression/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-regression diff --git a/Reproducibility/pom.xml b/Reproducibility/pom.xml index ce95e9256..03d4aa78e 100644 --- a/Reproducibility/pom.xml +++ b/Reproducibility/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 4.0.0 Reproducibility diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index d779c402d..359ab25aa 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -1943,7 +1943,7 @@ of the input file used when generating it. This code is not standalone and requires a support library to be linked with it. This support library is itself covered by the above license. -oci-java-sdk 2.11.1 - Dual licensed UPL/Apache 2.0 +oci-java-sdk 2.12.0 - Dual licensed UPL/Apache 2.0 Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. diff --git a/Util/InformationTheory/pom.xml b/Util/InformationTheory/pom.xml index 8c5555f26..8e8810785 100644 --- a/Util/InformationTheory/pom.xml +++ b/Util/InformationTheory/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-util - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml InformationTheory diff --git a/Util/ONNXExport/pom.xml b/Util/ONNXExport/pom.xml index 1462c062c..9302597d8 100644 --- a/Util/ONNXExport/pom.xml +++ b/Util/ONNXExport/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-util - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml ONNXExport diff --git a/Util/Tokenization/pom.xml b/Util/Tokenization/pom.xml index 0a4badd6b..c21a59d6a 100644 --- a/Util/Tokenization/pom.xml +++ b/Util/Tokenization/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo-util - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml Tokenization diff --git a/Util/pom.xml b/Util/pom.xml index ec97f0ba1..c68645118 100644 --- a/Util/pom.xml +++ b/Util/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml tribuo-util diff --git a/distribution/pom.xml b/distribution/pom.xml index 146429d9f..4421cb376 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -21,7 +21,7 @@ org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml diff --git a/docs/Roadmap.md b/docs/Roadmap.md index 85f2cae2a..73dd4790a 100644 --- a/docs/Roadmap.md +++ b/docs/Roadmap.md @@ -96,4 +96,4 @@ in a Tribuo compatible interface, but the codebase isn't quite ready for release - Add more tutorials. - Tribuo 4.0.2 adds tutorials for external model loading and columnar data processing. - Tribuo 4.1 adds tutorials for TensorFlow and document classification. - - Tribuo 4.2 adds tutorials for multi-label classification, ONNX export, and model reproducibility. + - Tribuo 4.2 adds tutorials for multi-label classification, HDBSCAN clustering, ONNX export, and model reproducibility. diff --git a/pom.xml b/pom.xml index aa5b22859..524d52bca 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ 4.0.0 org.tribuo tribuo - 4.2.0-SNAPSHOT + 4.2.0 pom Core @@ -282,7 +282,7 @@ Utility Packages - org.tribuo.util.*:org.tribuo.tests + org.tribuo.util.*:org.tribuo.reproducibility:org.tribuo.tests @@ -339,7 +339,7 @@ Utility Packages - org.tribuo.util.* + org.tribuo.util.*:org.tribuo.reproducibility:org.tribuo.tests diff --git a/tests/pom.xml b/tests/pom.xml index 23ac752e8..56d1f6d15 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -21,7 +21,7 @@ tribuo org.tribuo - 4.2.0-SNAPSHOT + 4.2.0 ../pom.xml 4.0.0 diff --git a/tutorials/README.md b/tutorials/README.md index a51c8334e..becfc2e4b 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -14,7 +14,7 @@ The tutorials cover: - [Intro regression with wine-quality](regression-tribuo-v4.ipynb) - [Configuration files, provenance and feature transformations on MNIST](configuration-tribuo-v4.ipynb) - [Clustering with K-Means](clustering-tribuo-v4.ipynb) -- [Clustering with HDBSCAN*](clustering-hdbscan-tribuo-v4.ipynb) +- [Clustering with HDBSCAN\*](clustering-hdbscan-tribuo-v4.ipynb) - [Anomaly Detection with LibSVM](anomaly-tribuo-v4.ipynb) - [Multi-label classification with Classifier Chains](multi-label-tribuo-v4.ipynb) - [Loading columnar data](columnar-tribuo-v4.ipynb) diff --git a/tutorials/anomaly-tribuo-v4.ipynb b/tutorials/anomaly-tribuo-v4.ipynb index 24fe2e523..38b12d05d 100644 --- a/tutorials/anomaly-tribuo-v4.ipynb +++ b/tutorials/anomaly-tribuo-v4.ipynb @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-anomaly-libsvm-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-anomaly-libsvm-4.2.0-jar-with-dependencies.jar" ] }, { @@ -111,7 +111,7 @@ "obj = 289.5926348816893, rho = 3.144570476807895\n", "nSV = 296, nBSV = 114\n", "\n", - "Training took (00:00:00:179)\n" + "Training took (00:00:00:147)\n" ] } ], @@ -210,7 +210,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+0" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/clustering-hdbscan-tribuo-v4.ipynb b/tutorials/clustering-hdbscan-tribuo-v4.ipynb index 5e26406d3..a87c72b7d 100644 --- a/tutorials/clustering-hdbscan-tribuo-v4.ipynb +++ b/tutorials/clustering-hdbscan-tribuo-v4.ipynb @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-clustering-hdbscan-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", + "%jars ./tribuo-clustering-hdbscan-4.2.0-jar-with-dependencies.jar\n", "%jars ./xchart-3.8.1.jar" ] }, @@ -166,9 +166,9 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "BufferedImage@82b341e: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" + "BufferedImage@5c4722f0: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" ] }, "execution_count": 8, @@ -269,9 +269,9 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "BufferedImage@65e69c81: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" + "BufferedImage@4d071d36: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" ] }, "execution_count": 12, @@ -372,9 +372,9 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "BufferedImage@2aa283b8: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" + "BufferedImage@1535978a: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" ] }, "execution_count": 16, @@ -446,9 +446,9 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "BufferedImage@4c2e1b64: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" + "BufferedImage@b571c2a: type = 1 DirectColorModel: rmask=ff0000 gmask=ff00 bmask=ff amask=0 IntegerInterleavedRaster: width = 600 height = 400 #Bands = 3 xOff = 0 yOff = 0 dataOffset[0] 0" ] }, "execution_count": 19, @@ -536,7 +536,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "11.0.12+8-LTS-237" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/clustering-tribuo-v4.ipynb b/tutorials/clustering-tribuo-v4.ipynb index 1d02b1e74..430bd36e9 100644 --- a/tutorials/clustering-tribuo-v4.ipynb +++ b/tutorials/clustering-tribuo-v4.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Clustering Tutorial\n", + "# K-Means Clustering Tutorial\n", "\n", - "This guide will show how to use Tribuo’s clustering models to find clusters in a toy dataset drawn from a mixture of Gaussians. We'll look at Tribuo's K-Means implementation and also discuss how evaluation works for clustering tasks.\n", + "This guide will show how to use one of Tribuo’s clustering models to find clusters in a toy dataset drawn from a mixture of Gaussians. We'll look at Tribuo's K-Means implementation and also discuss how evaluation works for clustering tasks.\n", "\n", "## Setup\n", "\n", @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-clustering-kmeans-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-clustering-kmeans-4.2.0-jar-with-dependencies.jar" ] }, { @@ -98,7 +98,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters took (00:00:00:048)\n" + "Training with 5 clusters took (00:00:00:054)\n" ] } ], @@ -180,7 +180,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters took (00:00:00:023)\n" + "Training with 5 clusters took (00:00:00:039)\n" ] } ], @@ -282,8 +282,8 @@ "data": { "text/plain": [ "Clustering Evaluation\n", - "Normalized MI = 0.8154291916732409\n", - "Adjusted MI = 0.8139169342020223" + "Normalized MI = 0.8154291916732408\n", + "Adjusted MI = 0.8139169342020222" ] }, "execution_count": 10, @@ -352,7 +352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters on 4 threads took (00:00:00:035)\n" + "Training with 5 clusters on 4 threads took (00:00:00:055)\n" ] } ], @@ -381,7 +381,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 20 clusters on 4 threads took (00:00:00:027)\n" + "Training with 20 clusters on 4 threads took (00:00:00:049)\n" ] } ], @@ -409,8 +409,8 @@ "data": { "text/plain": [ "Clustering Evaluation\n", - "Normalized MI = 0.8104463467727059\n", - "Adjusted MI = 0.8088941747451209" + "Normalized MI = 0.8104463467727057\n", + "Adjusted MI = 0.8088941747451207" ] }, "execution_count": 14, @@ -457,7 +457,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that the multi-threaded versions run in less time than the single threaded trainer, despite having 4 times the training data. The 20 centroid model has a tighter fit of the test data, though it is overparameterised. This is common in clustering tasks where it's hard to balance the model fitting with complexity. We'll look at adding more performance metrics so users can diagnose such issues in future releases. " + "We see that the multi-threaded versions run in about the same time as the single threaded trainer, but have 4 times the training data. The 20 centroid model has a tighter fit of the test data, though it is overparameterised. This is common in clustering tasks where it's hard to balance the model fitting with complexity. We'll look at adding more performance metrics so users can diagnose such issues in future releases. " ] }, { @@ -467,7 +467,7 @@ "## Conclusion\n", "We looked at clustering using Tribuo's K-Means implementation, experimented with different initialisations, and compared both the single-threaded and multi-threaded versions. Then we looked at the performance metrics available when there are ground truth clusterings.\n", "\n", - "We plan to further expand Tribuo's clustering functionality to incorporate other algorithms in the future. If you want to help, or have specific algorithmic requirements, file an issue on our [github page](https://github.com/oracle/tribuo)." + "We plan to further expand Tribuo's clustering functionality to incorporate other algorithms in the future, and added HDBSCAN in Tribuo v4.2. If you want to help, or have specific algorithmic requirements, file an issue on our [github page](https://github.com/oracle/tribuo)." ] } ], @@ -483,7 +483,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+0" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/columnar-tribuo-v4.ipynb b/tutorials/columnar-tribuo-v4.ipynb index e81068fde..36782fff7 100644 --- a/tutorials/columnar-tribuo-v4.ipynb +++ b/tutorials/columnar-tribuo-v4.ipynb @@ -38,8 +38,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar" ] }, { @@ -568,7 +568,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17-ea+22-1964" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/configuration-tribuo-v4.ipynb b/tutorials/configuration-tribuo-v4.ipynb index 0ab70c3d5..7ba4be122 100644 --- a/tutorials/configuration-tribuo-v4.ipynb +++ b/tutorials/configuration-tribuo-v4.ipynb @@ -32,8 +32,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar" ] }, { @@ -487,7 +487,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training logistic regression took (00:00:03:669)\n" + "Training logistic regression took (00:00:03:494)\n" ] } ], @@ -807,7 +807,7 @@ "\t\t\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n", "\t\t\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n", "\t\t\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n", - "\t\t\t\t\t\t\tdatasource-creation-time = 2021-05-24T12:24:14.958637-04:00\n", + "\t\t\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:07:51.388837-05:00\n", "\t\t\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n", "\t\t\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n", @@ -819,7 +819,7 @@ "\t\t\t\t\tnum-examples = 60000\n", "\t\t\t\t\tnum-features = 717\n", "\t\t\t\t\tnum-outputs = 10\n", - "\t\t\t\t\ttribuo-version = 4.1.0\n", + "\t\t\t\t\ttribuo-version = 4.2.0\n", "\t\t\t\t)\n", "\t\t\ttrainer = LinearSGDTrainer(\n", "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.linear.LinearSGDTrainer\n", @@ -839,17 +839,17 @@ "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\t\t\t\t\thost-short-name = LabelObjective\n", "\t\t\t\t\t\t)\n", - "\t\t\t\t\ttribuo-version = 4.1.0\n", + "\t\t\t\t\ttribuo-version = 4.2.0\n", "\t\t\t\t\ttrain-invocation-count = 0\n", "\t\t\t\t\tis-sequence = false\n", "\t\t\t\t\thost-short-name = Trainer\n", "\t\t\t\t)\n", - "\t\t\ttrained-at = 2021-05-24T12:24:19.604718-04:00\n", + "\t\t\ttrained-at = 2021-12-18T20:07:55.508414-05:00\n", "\t\t\tinstance-values = Map{\n", "\t\t\t\treconfigured-model=true\n", "\t\t\t}\n", - "\t\t\ttribuo-version = 4.1.0\n", - "\t\t\tjava-version = 17-ea\n", + "\t\t\ttribuo-version = 4.2.0\n", + "\t\t\tjava-version = 17.0.1\n", "\t\t\tos-name = Mac OS X\n", "\t\t\tos-arch = x86_64\n", "\t\t)\n", @@ -864,7 +864,7 @@ "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/t10k-images-idx3-ubyte.gz\n", "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:19:56-04:00\n", "\t\t\t\t\toutput-resource-hash = F7AE60F92E00EC6DEBD23A6088C31DBD2371ECA3FFA0DEFAEFB259924204AEC6\n", - "\t\t\t\t\tdatasource-creation-time = 2021-05-24T12:24:03.396403-04:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:07:41.373899-05:00\n", "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:05-04:00\n", "\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\tfeatures-resource-hash = 8D422C7B0A1C1C79245A5BCF07FE86E33EEAFEE792B84584AEC276F5A2DBC4E6\n", @@ -876,9 +876,9 @@ "\t\t\tnum-examples = 10000\n", "\t\t\tnum-features = 668\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.1.0\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t)\n", - "\ttribuo-version = 4.1.0\n", + "\ttribuo-version = 4.2.0\n", ")\n" ] } @@ -906,7 +906,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training transformed logistic regression took (00:00:05:252)\n" + "Training transformed logistic regression took (00:00:04:707)\n" ] } ], @@ -1113,7 +1113,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17-ea+22-1964" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/document-classification-tribuo-v4.ipynb b/tutorials/document-classification-tribuo-v4.ipynb index 6bbf85e6c..ca1ffcfc6 100644 --- a/tutorials/document-classification-tribuo-v4.ipynb +++ b/tutorials/document-classification-tribuo-v4.ipynb @@ -49,8 +49,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-onnx-4.2.0-jar-with-dependencies.jar" ] }, { @@ -198,7 +198,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on BoW features took (00:00:09:659)\n", + "Training the model on BoW features took (00:00:09:601)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 352 46 110 0.884 0.762 0.819\n", @@ -244,7 +244,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We got a macro F1 score of 79.1%, which is a fairly good starting point and it's roughly what other linear models get on this task (e.g., scikit-learn's text classification tutorial gets 76.9% macro F1 when using a similar multinomial Naive Bayes model)." + "We got a macro F1 score of 79.6%, which is a fairly good starting point and it's roughly what other linear models get on this task (e.g., scikit-learn's text classification tutorial gets 76.9% macro F1 when using a similar multinomial Naive Bayes model)." ] }, { @@ -291,7 +291,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on Unigram features took (00:00:10:529)\n", + "Training the model on Unigram features took (00:00:09:146)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 362 36 88 0.910 0.804 0.854\n", @@ -381,7 +381,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on Bigram features took (00:00:41:981)\n", + "Training the model on Bigram features took (00:00:43:790)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 331 67 57 0.832 0.853 0.842\n", @@ -480,7 +480,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on TF-IDF features took (00:00:42:471)\n", + "Training the model on TF-IDF features took (00:00:45:063)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 350 48 183 0.879 0.657 0.752\n", @@ -570,7 +570,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on hashed features took (00:00:24:289)\n", + "Training the model on hashed features took (00:00:23:354)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 306 92 125 0.769 0.710 0.738\n", @@ -662,7 +662,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on trimmed TF-IDF features took (00:00:19:049)\n", + "Training the model on trimmed TF-IDF features took (00:00:19:928)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 337 61 93 0.847 0.784 0.814\n", @@ -707,7 +707,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As with the feature hashing above, this model trains more quickly because there is less data to process, but the speed improvement is more stubstantial as the number of features in each example is lower (because the hashing produces a denser example than trimming out infrequent features). Performance dropped slightly as compared to the TF-IDF model, but again it is around 10% of the parameters, with a corresponding reduction in memory and runtime in inference and training. Performance is improved over the hashing as we're not colliding features at random, we're simply removing ones which are infrequent. If a feature is infrequent we probably can't estimate the weight for it very well so it helps remove some of the noise.\n", + "As with the feature hashing above, this model trains more quickly because there is less data to process, but the speed improvement is more substantial as the number of features in each example is lower (because the hashing produces a denser example than trimming out infrequent features). Performance dropped slightly as compared to the TF-IDF model, but again it is around 10% of the parameters, with a corresponding reduction in memory and runtime in inference and training. Performance is improved over the hashing as we're not colliding features at random, we're simply removing ones which are infrequent. If a feature is infrequent we probably can't estimate the weight for it very well so it helps remove some of the noise.\n", "\n", "Choosing which one of feature hashing and trimming out infrequent features to apply is problem dependent. Feature hashing can work in denser feature spaces than trimming infrequent features, but both still require some amount of sparsity in the problem to have any useful effect. With text datasets then trimming the infrequent words/features is usually helpful." ] @@ -750,7 +750,7 @@ "text": [ "bert training data size = 11314, number of features = 768, number of classes = 20\n", "bert testing data size = 7532, number of features = 768, number of classes = 20\n", - "Extracting features with BERT took (01:06:52:756)\n" + "Extracting features with BERT took (00:38:37:476)\n" ] } ], @@ -789,7 +789,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training a LR on BERT features took (00:00:08:960)\n", + "Training a LR on BERT features took (00:00:06:082)\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 353 45 111 0.887 0.761 0.819\n", "rec.autos 396 332 64 99 0.838 0.770 0.803\n", @@ -880,7 +880,7 @@ "\t\t\tclass-name = org.tribuo.classification.LabelFactory\n", "\t\t)\n", "\tfile-modified-time = 2003-03-18T07:24:55-05:00\n", - "\tdatasource-creation-time = 2021-05-24T12:46:58.801385-04:00\n", + "\tdatasource-creation-time = 2021-12-18T20:50:57.169758-05:00\n", ")\n" ] } @@ -918,7 +918,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17-ea+22-1964" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/external-models-tribuo-v4.ipynb b/tutorials/external-models-tribuo-v4.ipynb index f9cf61689..72d17fc4d 100644 --- a/tutorials/external-models-tribuo-v4.ipynb +++ b/tutorials/external-models-tribuo-v4.ipynb @@ -24,8 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars tribuo-onnx-4.2.0-jar-with-dependencies.jar" ] }, { @@ -469,7 +469,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+35-LTS-2724" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/irises-tribuo-v4.ipynb b/tutorials/irises-tribuo-v4.ipynb index fa10fe888..8cf09625f 100644 --- a/tutorials/irises-tribuo-v4.ipynb +++ b/tutorials/irises-tribuo-v4.ipynb @@ -27,8 +27,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar" ] }, { @@ -410,7 +410,7 @@ "\t\t\tdataPath = /Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\n", "\t\t\tresource-hash = 0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\n", "\t\t\tfile-modified-time = 1999-12-14T15:12:39-05:00\n", - "\t\t\tdatasource-creation-time = 2021-11-01T12:52:18.814629-04:00\n", + "\t\t\tdatasource-creation-time = 2021-12-18T20:31:02.286464-05:00\n", "\t\t\thost-short-name = DataSource\n", "\t\t)\n", "\ttrain-proportion = 0.7\n", @@ -462,7 +462,7 @@ "\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\thost-short-name = LabelObjective\n", "\t\t)\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\ttribuo-version = 4.2.0\n", "\ttrain-invocation-count = 0\n", "\tis-sequence = false\n", "\thost-short-name = Trainer\n", @@ -523,7 +523,7 @@ " \"tribuo-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"tribuo-version\",\n", - " \"value\" : \"4.2.0-SNAPSHOT\",\n", + " \"value\" : \"4.2.0\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -531,7 +531,7 @@ " \"java-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"java-version\",\n", - " \"value\" : \"17\",\n", + " \"value\" : \"17.0.1\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -555,7 +555,7 @@ " \"trained-at\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"trained-at\",\n", - " \"value\" : \"2021-11-01T12:52:19.228195-04:00\",\n", + " \"value\" : \"2021-12-18T20:31:02.707624-05:00\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -618,7 +618,7 @@ " \"tribuo-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"tribuo-version\",\n", - " \"value\" : \"4.2.0-SNAPSHOT\",\n", + " \"value\" : \"4.2.0\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -677,7 +677,7 @@ " \"tribuo-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"tribuo-version\",\n", - " \"value\" : \"4.2.0-SNAPSHOT\",\n", + " \"value\" : \"4.2.0\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -982,7 +982,7 @@ " \"datasource-creation-time\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"datasource-creation-time\",\n", - " \"value\" : \"2021-11-01T12:52:18.814629-04:00\",\n", + " \"value\" : \"2021-12-18T20:31:02.286464-05:00\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -1443,7 +1443,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-11-01T12:52:18.814629-04:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-11-01T12:52:19.228195-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17,os-name=Mac OS X,os-arch=x86_64)\n" + "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-12-18T20:31:02.286464-05:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-18T20:31:02.707624-05:00,instance-values={},tribuo-version=4.2.0,java-version=17.0.1,os-name=Mac OS X,os-arch=x86_64)\n" ] } ], @@ -1468,12 +1468,12 @@ "output_type": "stream", "text": [ "{\n", - " \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.2.0\",\n", " \"dataset-provenance\" : {\n", " \"num-features\" : \"4\",\n", " \"num-examples\" : \"45\",\n", " \"num-outputs\" : \"3\",\n", - " \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.2.0\",\n", " \"datasource\" : {\n", " \"train-proportion\" : \"0.7\",\n", " \"seed\" : \"1\",\n", @@ -1531,7 +1531,7 @@ " \"file-modified-time\" : \"1999-12-14T15:12:39-05:00\",\n", " \"quote\" : \"\\\"\",\n", " \"outputRequired\" : \"true\",\n", - " \"datasource-creation-time\" : \"2021-11-01T12:52:18.814629-04:00\",\n", + " \"datasource-creation-time\" : \"2021-12-18T20:31:02.286464-05:00\",\n", " \"outputFactory\" : {\n", " \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n", " },\n", @@ -1551,11 +1551,11 @@ " \"class-name\" : \"org.tribuo.provenance.EvaluationProvenance\",\n", " \"model-provenance\" : {\n", " \"instance-values\" : { },\n", - " \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n", - " \"java-version\" : \"17\",\n", + " \"tribuo-version\" : \"4.2.0\",\n", + " \"java-version\" : \"17.0.1\",\n", " \"trainer\" : {\n", " \"seed\" : \"12345\",\n", - " \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.2.0\",\n", " \"minibatchSize\" : \"1\",\n", " \"train-invocation-count\" : \"0\",\n", " \"is-sequence\" : \"false\",\n", @@ -1577,13 +1577,13 @@ " }\n", " },\n", " \"os-arch\" : \"x86_64\",\n", - " \"trained-at\" : \"2021-11-01T12:52:19.228195-04:00\",\n", + " \"trained-at\" : \"2021-12-18T20:31:02.707624-05:00\",\n", " \"os-name\" : \"Mac OS X\",\n", " \"dataset\" : {\n", " \"num-features\" : \"4\",\n", " \"num-examples\" : \"105\",\n", " \"num-outputs\" : \"3\",\n", - " \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.2.0\",\n", " \"datasource\" : {\n", " \"train-proportion\" : \"0.7\",\n", " \"seed\" : \"1\",\n", @@ -1641,7 +1641,7 @@ " \"file-modified-time\" : \"1999-12-14T15:12:39-05:00\",\n", " \"quote\" : \"\\\"\",\n", " \"outputRequired\" : \"true\",\n", - " \"datasource-creation-time\" : \"2021-11-01T12:52:18.814629-04:00\",\n", + " \"datasource-creation-time\" : \"2021-12-18T20:31:02.286464-05:00\",\n", " \"outputFactory\" : {\n", " \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n", " },\n", @@ -1802,7 +1802,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+35-LTS-2724" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/multi-label-tribuo-v4.ipynb b/tutorials/multi-label-tribuo-v4.ipynb index df14d7b7e..637cc1ea2 100644 --- a/tutorials/multi-label-tribuo-v4.ipynb +++ b/tutorials/multi-label-tribuo-v4.ipynb @@ -32,8 +32,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-multilabel-sgd-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-multilabel-sgd-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar" ] }, { @@ -172,7 +172,7 @@ "output_type": "stream", "text": [ "\n", - "Linear model training took (00:00:00:206)\n" + "Linear model training took (00:00:00:245)\n" ] } ], @@ -201,7 +201,7 @@ "output_type": "stream", "text": [ "\n", - "Tree model training took (00:00:03:805)\n" + "Tree model training took (00:00:03:499)\n" ] } ], @@ -256,7 +256,7 @@ "output_type": "stream", "text": [ "\n", - "Linear model evaluation took (00:00:00:068)\n", + "Linear model evaluation took (00:00:00:073)\n", "Class n tp fn fp recall prec f1\n", "(LabelSet={12}) 683 677 6 230 0.991 0.746 0.852\n", "(LabelSet={13}) 13 0 13 0 0.000 0.000 0.000\n", @@ -307,7 +307,7 @@ "output_type": "stream", "text": [ "\n", - "Tree model evaluation took (00:00:00:086)\n", + "Tree model evaluation took (00:00:00:085)\n", "Class n tp fn fp recall prec f1\n", "(LabelSet={12}) 683 607 76 201 0.889 0.751 0.814\n", "(LabelSet={13}) 13 0 13 2 0.000 0.000 0.000\n", @@ -387,8 +387,8 @@ "output_type": "stream", "text": [ "\n", - "Classifier Chain model training took (00:00:03:344)\n", - "Classifier Chain model evaluation took (00:00:00:152)\n", + "Classifier Chain model training took (00:00:03:195)\n", + "Classifier Chain model evaluation took (00:00:00:146)\n", "Class n tp fn fp recall prec f1\n", "(LabelSet={12}) 683 616 67 203 0.902 0.752 0.820\n", "(LabelSet={13}) 13 0 13 2 0.000 0.000 0.000\n", @@ -448,8 +448,8 @@ "output_type": "stream", "text": [ "\n", - "Classifier Chain Ensemble model training took (00:01:05:893)\n", - "Classifier Chain Ensemble model evaluation took (00:00:02:475)\n", + "Classifier Chain Ensemble model training took (00:01:04:418)\n", + "Classifier Chain Ensemble model evaluation took (00:00:02:474)\n", "Class n tp fn fp recall prec f1\n", "(LabelSet={12}) 683 629 54 216 0.921 0.744 0.823\n", "(LabelSet={13}) 13 0 13 1 0.000 0.000 0.000\n", @@ -494,7 +494,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As expected the classifier chain ensemble outperformed the binary relevance model and the single classifier chain when using trees as the base learner, at the cost of the greatest runtime. It did this by significantly decreasing the number of false positives, at the cost of a small increase in false negatives. We didn't quite beat the performance of the linear model in terms of Jaccard score, but in general classifier chains are a powerful multi-label approach, and we could always use a the linear model as a base learner (and if you do, then you do improve the Jaccard score above 0.497). We leave the implementation of that as an exercise for the reader.\n", + "As expected the classifier chain ensemble outperformed the binary relevance model and the single classifier chain when using trees as the base learner, at the cost of the greatest runtime. It did this by significantly decreasing the number of false positives, at the cost of a small increase in false negatives. We didn't quite beat the performance of the linear model in terms of Jaccard score, but in general classifier chains are a powerful multi-label approach, and we could always use the linear model as a base learner (and if you do, then you do improve the Jaccard score above 0.497). We leave the implementation of that as an exercise for the reader.\n", "\n", "## Conclusion\n", "\n", @@ -514,7 +514,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "11.0.10+8-LTS-162" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/onnx-export-tribuo-v4.ipynb b/tutorials/onnx-export-tribuo-v4.ipynb index 77e25c14b..29fee8df0 100644 --- a/tutorials/onnx-export-tribuo-v4.ipynb +++ b/tutorials/onnx-export-tribuo-v4.ipynb @@ -37,10 +37,10 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-oci-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-oci-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-onnx-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar" ] }, { @@ -150,7 +150,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training factorization machine took (00:00:18:816)\n" + "Training factorization machine took (00:00:11:305)\n" ] } ], @@ -177,7 +177,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Scoring factorization machine took (00:00:00:475)\n", + "Scoring factorization machine took (00:00:00:412)\n", "Class n tp fn fp recall prec f1\n", "0 980 959 21 31 0.979 0.969 0.974\n", "1 1,135 1,120 15 22 0.987 0.981 0.984\n", @@ -352,7 +352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Scoring ONNX factorization machine took (00:00:01:578)\n", + "Scoring ONNX factorization machine took (00:00:00:810)\n", "Class n tp fn fp recall prec f1\n", "0 980 959 21 31 0.979 0.969 0.974\n", "1 1,135 1,120 15 22 0.987 0.981 0.984\n", @@ -422,7 +422,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "An important part of a Tribuo model is the provenance. We don't want to lose that information when exporting models to ONNX format, so we encode the provenance in the ONNX protobuf. It uses the marshalled provenance format from OLCUT, and the protos are available in OLCUT so they could be parsed in other systems. As a result when loading in a Tribuo-exported ONNX model the `ONNXExternalModel` class has two provenance objects, one for the `ONNXExternalModel` itself, and one for the original Model object.\n", + "An important part of a Tribuo model is the provenance. We don't want to lose that information when exporting models to ONNX format, so we encode the provenance in the ONNX protobuf. It uses the marshalled provenance format from OLCUT, and the protos are available in OLCUT so they could be parsed in other systems. As a result when loading in a Tribuo-exported ONNX model the `ONNXExternalModel` class has two provenance objects, one for the `ONNXExternalModel` itself, and one for the original `Model` object.\n", "\n", "Let's examine both of these provenances. First the one for the `ONNXExternalModel`:" ] @@ -446,7 +446,7 @@ "\t\t\t\t\toutputFactory = LabelFactory(\n", "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n", "\t\t\t\t\t\t)\n", - "\t\t\t\t\tdatasource-creation-time = 2021-12-14T15:03:45.571121-05:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:36:37.266127-05:00\n", "\t\t\t\t)\n", "\t\t\ttransformations = List[]\n", "\t\t\tis-sequence = false\n", @@ -454,25 +454,25 @@ "\t\t\tnum-examples = -1\n", "\t\t\tnum-features = 717\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t)\n", "\ttrainer = Trainer(\n", "\t\t\tclass-name = org.tribuo.Trainer\n", - "\t\t\tfileModifiedTime = 2021-12-14T15:03:44.423-05:00\n", - "\t\t\tmodelHash = 5934A79EA0B7A569DF2A42F08BE2DBED1C3E7D25A90C33D811D77502AEEFA431\n", + "\t\t\tfileModifiedTime = 2021-12-18T20:36:36.445-05:00\n", + "\t\t\tmodelHash = 06071247AEDE7539B899A2D530508D8E2B43304B8A7884A257368AA2CF1C18ED\n", "\t\t\tlocation = file:/Users/apocock/Development/Tribuo/tutorials/./fm-mnist.onnx\n", "\t\t)\n", - "\ttrained-at = 2021-12-14T15:03:45.568284-05:00\n", + "\ttrained-at = 2021-12-18T20:36:37.263832-05:00\n", "\tinstance-values = Map{\n", "\t\tmodel-domain=org.tribuo.tutorials.onnxexport.fm\n", "\t\tmodel-graphname=FMClassificationModel\n", - "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-12-14T15:03:23.159717-05:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-14T15:03:43.488204-05:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=11.0.10,os-name=Mac OS X,os-arch=x86_64)\n", + "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-12-18T20:36:23.109293-05:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-18T20:36:35.640663-05:00,instance-values={},tribuo-version=4.2.0,java-version=17.0.1,os-name=Mac OS X,os-arch=x86_64)\n", "\t\tmodel-producer=Tribuo\n", "\t\tmodel-version=0\n", "\t\tinput-name=input\n", "\t}\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", - "\tjava-version = 11.0.10\n", + "\ttribuo-version = 4.2.0\n", + "\tjava-version = 17.0.1\n", "\tos-name = Mac OS X\n", "\tos-arch = x86_64\n", ")\n" @@ -515,7 +515,7 @@ "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n", "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n", "\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n", - "\t\t\t\t\tdatasource-creation-time = 2021-12-14T15:03:23.159717-05:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:36:23.109293-05:00\n", "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n", "\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n", @@ -527,7 +527,7 @@ "\t\t\tnum-examples = 60000\n", "\t\t\tnum-features = 717\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t)\n", "\ttrainer = FMClassificationTrainer(\n", "\t\t\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationTrainer\n", @@ -549,15 +549,15 @@ "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\t\t\thost-short-name = LabelObjective\n", "\t\t\t\t)\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t\ttrain-invocation-count = 0\n", "\t\t\tis-sequence = false\n", "\t\t\thost-short-name = Trainer\n", "\t\t)\n", - "\ttrained-at = 2021-12-14T15:03:43.488204-05:00\n", + "\ttrained-at = 2021-12-18T20:36:35.640663-05:00\n", "\tinstance-values = Map{}\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", - "\tjava-version = 11.0.10\n", + "\ttribuo-version = 4.2.0\n", + "\tjava-version = 17.0.1\n", "\tos-name = Mac OS X\n", "\tos-arch = x86_64\n", ")\n" @@ -648,7 +648,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Scoring ensemble took (00:00:00:880)\n", + "Scoring ensemble took (00:00:00:675)\n", "Class n tp fn fp recall prec f1\n", "0 980 965 15 43 0.985 0.957 0.971\n", "1 1,135 1,119 16 34 0.986 0.971 0.978\n", @@ -725,7 +725,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Scoring ONNX ensemble took (00:00:01:901)\n", + "Scoring ONNX ensemble took (00:00:01:021)\n", "Predictions are equal - true\n" ] } @@ -762,7 +762,7 @@ "source": [ "// Set these variables appropriately for your OCI account\n", "var compartmentID = \"your-oci-compartment-id\";\n", - "var projectID = \"your-oci-ds-project-id\";\n" + "var projectID = \"your-oci-ds-project-id\";" ] }, { @@ -944,7 +944,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "11.0.10+8-LTS-162" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/regression-tribuo-v4.ipynb b/tutorials/regression-tribuo-v4.ipynb index 292c6568c..02e8611eb 100644 --- a/tutorials/regression-tribuo-v4.ipynb +++ b/tutorials/regression-tribuo-v4.ipynb @@ -23,10 +23,10 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-sgd-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-xgboost-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-tree-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-sgd-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-xgboost-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-tree-4.2.0-jar-with-dependencies.jar" ] }, { @@ -264,7 +264,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training Linear Regression (SGD) took (00:00:00:123)\n", + "Training Linear Regression (SGD) took (00:00:00:070)\n", "Evaluation (train):\n", " RMSE 0.979522\n", " MAE 0.741870\n", @@ -357,7 +357,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training Linear Regression (AdaGrad) took (00:00:00:091)\n", + "Training Linear Regression (AdaGrad) took (00:00:00:041)\n", "Evaluation (train):\n", " RMSE 0.735311\n", " MAE 0.575096\n", @@ -403,7 +403,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training CART took (00:00:00:060)\n", + "Training CART took (00:00:00:071)\n", "Evaluation (train):\n", " RMSE 0.544516\n", " MAE 0.405062\n", @@ -436,7 +436,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training XGBoost took (00:00:00:320)\n", + "Training XGBoost took (00:00:00:263)\n", "Evaluation (train):\n", " RMSE 0.143871\n", " MAE 0.097167\n", @@ -477,7 +477,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17-ea+22-1964" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/reproducibility-tribuo-v4.ipynb b/tutorials/reproducibility-tribuo-v4.ipynb index a5c6a8565..2f90f645b 100644 --- a/tutorials/reproducibility-tribuo-v4.ipynb +++ b/tutorials/reproducibility-tribuo-v4.ipynb @@ -21,10 +21,10 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-reproducibility-4.2.0-SNAPSHOT.jar" + "%jars ./tribuo-classification-experiments-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-onnx-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.2.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-reproducibility-4.2.0-jar-with-dependencies.jar" ] }, { @@ -68,7 +68,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-11-01T12:52:18.814629-04:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-11-01T12:52:19.228195-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17,os-name=Mac OS X,os-arch=x86_64)\n" + "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-12-18T20:31:02.286464-05:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-18T20:31:02.707624-05:00,instance-values={},tribuo-version=4.2.0,java-version=17.0.1,os-name=Mac OS X,os-arch=x86_64)\n" ] } ], @@ -195,7 +195,7 @@ "\t\t\t\t\tdataPath = /Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\n", "\t\t\t\t\tresource-hash = 0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\n", "\t\t\t\t\tfile-modified-time = 1999-12-14T15:12:39-05:00\n", - "\t\t\t\t\tdatasource-creation-time = 2021-11-03T09:51:39.561821-04:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:38:43.398834-05:00\n", "\t\t\t\t\thost-short-name = DataSource\n", "\t\t\t\t)\n", "\t\t\ttrain-proportion = 0.7\n", @@ -209,7 +209,7 @@ "\tnum-examples = 105\n", "\tnum-features = 4\n", "\tnum-outputs = 3\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\ttribuo-version = 4.2.0\n", ")\n" ] } @@ -254,7 +254,7 @@ "\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\thost-short-name = LabelObjective\n", "\t\t)\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\ttribuo-version = 4.2.0\n", "\ttrain-invocation-count = 0\n", "\tis-sequence = false\n", "\thost-short-name = Trainer\n", @@ -305,15 +305,15 @@ " \"datasource\" : {\n", " \"source\" : {\n", " \"datasource-creation-time\" : {\n", - " \"original\" : \"2021-11-01T12:52:18.814629-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:51:39.561821-04:00\"\n", + " \"original\" : \"2021-12-18T20:31:02.286464-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:38:43.398834-05:00\"\n", " }\n", " }\n", " }\n", " },\n", " \"trained-at\" : {\n", - " \"original\" : \"2021-11-01T12:52:19.228195-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:51:39.842601-04:00\"\n", + " \"original\" : \"2021-12-18T20:31:02.707624-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:38:43.655448-05:00\"\n", " }\n", "}\n" ] @@ -433,7 +433,7 @@ "\t\t\t\t\toutputFactory = LabelFactory(\n", "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n", "\t\t\t\t\t\t)\n", - "\t\t\t\t\tdatasource-creation-time = 2021-11-03T09:51:50.151668-04:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:38:52.702297-05:00\n", "\t\t\t\t)\n", "\t\t\ttransformations = List[]\n", "\t\t\tis-sequence = false\n", @@ -441,25 +441,25 @@ "\t\t\tnum-examples = -1\n", "\t\t\tnum-features = 717\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t)\n", "\ttrainer = Trainer(\n", "\t\t\tclass-name = org.tribuo.Trainer\n", - "\t\t\tfileModifiedTime = 2021-10-26T17:51:36.243-04:00\n", - "\t\t\tmodelHash = 8DD82B31BD7CFC1C520942590E173AED07AF33C97C32021EE94738FA9FF4CC89\n", + "\t\t\tfileModifiedTime = 2021-12-18T20:36:36.445-05:00\n", + "\t\t\tmodelHash = 06071247AEDE7539B899A2D530508D8E2B43304B8A7884A257368AA2CF1C18ED\n", "\t\t\tlocation = file:/Users/apocock/Development/Tribuo/tutorials/./fm-mnist.onnx\n", "\t\t)\n", - "\ttrained-at = 2021-11-03T09:51:50.149558-04:00\n", + "\ttrained-at = 2021-12-18T20:38:52.700329-05:00\n", "\tinstance-values = Map{\n", "\t\tmodel-domain=org.tribuo.tutorials.onnxexport.fm\n", "\t\tmodel-graphname=FMClassificationModel\n", - "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-10-26T17:51:22.314557-04:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-10-26T17:51:35.432511-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17-ea,os-name=Mac OS X,os-arch=x86_64)\n", + "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-12-18T20:36:23.109293-05:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-18T20:36:35.640663-05:00,instance-values={},tribuo-version=4.2.0,java-version=17.0.1,os-name=Mac OS X,os-arch=x86_64)\n", "\t\tmodel-producer=Tribuo\n", "\t\tmodel-version=0\n", "\t\tinput-name=input\n", "\t}\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", - "\tjava-version = 17\n", + "\ttribuo-version = 4.2.0\n", + "\tjava-version = 17.0.1\n", "\tos-name = Mac OS X\n", "\tos-arch = x86_64\n", ")\n" @@ -501,7 +501,7 @@ "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n", "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n", "\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n", - "\t\t\t\t\tdatasource-creation-time = 2021-10-26T17:51:22.314557-04:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-12-18T20:36:23.109293-05:00\n", "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n", "\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n", @@ -513,7 +513,7 @@ "\t\t\tnum-examples = 60000\n", "\t\t\tnum-features = 717\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t)\n", "\ttrainer = FMClassificationTrainer(\n", "\t\t\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationTrainer\n", @@ -535,15 +535,15 @@ "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\t\t\thost-short-name = LabelObjective\n", "\t\t\t\t)\n", - "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.2.0\n", "\t\t\ttrain-invocation-count = 0\n", "\t\t\tis-sequence = false\n", "\t\t\thost-short-name = Trainer\n", "\t\t)\n", - "\ttrained-at = 2021-10-26T17:51:35.432511-04:00\n", + "\ttrained-at = 2021-12-18T20:36:35.640663-05:00\n", "\tinstance-values = Map{}\n", - "\ttribuo-version = 4.2.0-SNAPSHOT\n", - "\tjava-version = 17-ea\n", + "\ttribuo-version = 4.2.0\n", + "\tjava-version = 17.0.1\n", "\tos-name = Mac OS X\n", "\tos-arch = x86_64\n", ")\n" @@ -593,18 +593,14 @@ " \"dataset\" : {\n", " \"datasource\" : {\n", " \"datasource-creation-time\" : {\n", - " \"original\" : \"2021-10-26T17:51:22.314557-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:51:56.746038-04:00\"\n", + " \"original\" : \"2021-12-18T20:36:23.109293-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:38:58.740193-05:00\"\n", " }\n", " }\n", " },\n", - " \"java-version\" : {\n", - " \"original\" : \"17-ea\",\n", - " \"reproduced\" : \"17\"\n", - " },\n", " \"trained-at\" : {\n", - " \"original\" : \"2021-10-26T17:51:35.432511-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:52:10.606727-04:00\"\n", + " \"original\" : \"2021-12-18T20:36:35.640663-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:39:09.831081-05:00\"\n", " }\n", "}\n" ] @@ -718,18 +714,14 @@ " \"dataset\" : {\n", " \"datasource\" : {\n", " \"datasource-creation-time\" : {\n", - " \"original\" : \"2021-10-26T17:51:22.314557-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:51:47.929133-04:00\"\n", + " \"original\" : \"2021-12-18T20:36:23.109293-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:38:51.027212-05:00\"\n", " }\n", " }\n", " },\n", - " \"java-version\" : {\n", - " \"original\" : \"17-ea\",\n", - " \"reproduced\" : \"17\"\n", - " },\n", " \"trained-at\" : {\n", - " \"original\" : \"2021-10-26T17:51:35.432511-04:00\",\n", - " \"reproduced\" : \"2021-11-03T09:52:20.359019-04:00\"\n", + " \"original\" : \"2021-12-18T20:36:35.640663-05:00\",\n", + " \"reproduced\" : \"2021-12-18T20:39:18.280345-05:00\"\n", " },\n", " \"trainer\" : {\n", " \"class-name\" : {\n", @@ -795,7 +787,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+35-LTS-2724" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4, diff --git a/tutorials/tensorflow-tribuo-v4.ipynb b/tutorials/tensorflow-tribuo-v4.ipynb index 7b80b21a6..2f3be8b36 100644 --- a/tutorials/tensorflow-tribuo-v4.ipynb +++ b/tutorials/tensorflow-tribuo-v4.ipynb @@ -41,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-tensorflow-4.2.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-tensorflow-4.2.0-jar-with-dependencies.jar" ] }, { @@ -274,7 +274,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Wine quality training took (00:00:02:519)\n" + "Wine quality training took (00:00:01:891)\n" ] } ], @@ -302,9 +302,9 @@ "output_type": "stream", "text": [ "Wine quality evaluation:\n", - " RMSE 0.651519\n", - " MAE 0.509955\n", - " R^2 0.347267\n", + " RMSE 0.651441\n", + " MAE 0.510348\n", + " R^2 0.347424\n", "\n" ] } @@ -394,7 +394,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MNIST MLP training took (00:01:00:660)\n" + "MNIST MLP training took (00:01:06:256)\n" ] } ], @@ -422,32 +422,32 @@ "output_type": "stream", "text": [ "Class n tp fn fp recall prec f1\n", - "0 980 0 980 0 0.000 0.000 0.000\n", - "1 1,135 1,135 0 8,865 1.000 0.114 0.204\n", - "2 1,032 0 1,032 0 0.000 0.000 0.000\n", - "3 1,010 0 1,010 0 0.000 0.000 0.000\n", - "4 982 0 982 0 0.000 0.000 0.000\n", - "5 892 0 892 0 0.000 0.000 0.000\n", - "6 958 0 958 0 0.000 0.000 0.000\n", - "7 1,028 0 1,028 0 0.000 0.000 0.000\n", - "8 974 0 974 0 0.000 0.000 0.000\n", - "9 1,009 0 1,009 0 0.000 0.000 0.000\n", - "Total 10,000 1,135 8,865 8,865\n", - "Accuracy 0.114\n", - "Micro Average 0.114 0.114 0.114\n", - "Macro Average 0.100 0.011 0.020\n", - "Balanced Error Rate 0.900\n", + "0 980 960 20 94 0.980 0.911 0.944\n", + "1 1,135 1,105 30 23 0.974 0.980 0.977\n", + "2 1,032 922 110 39 0.893 0.959 0.925\n", + "3 1,010 916 94 121 0.907 0.883 0.895\n", + "4 982 930 52 61 0.947 0.938 0.943\n", + "5 892 813 79 74 0.911 0.917 0.914\n", + "6 958 914 44 36 0.954 0.962 0.958\n", + "7 1,028 942 86 37 0.916 0.962 0.939\n", + "8 974 893 81 107 0.917 0.893 0.905\n", + "9 1,009 926 83 87 0.918 0.914 0.916\n", + "Total 10,000 9,321 679 679\n", + "Accuracy 0.932\n", + "Micro Average 0.932 0.932 0.932\n", + "Macro Average 0.932 0.932 0.931\n", + "Balanced Error Rate 0.068\n", " 0 1 2 3 4 5 6 7 8 9\n", - "0 0 980 0 0 0 0 0 0 0 0\n", - "1 0 1,135 0 0 0 0 0 0 0 0\n", - "2 0 1,032 0 0 0 0 0 0 0 0\n", - "3 0 1,010 0 0 0 0 0 0 0 0\n", - "4 0 982 0 0 0 0 0 0 0 0\n", - "5 0 892 0 0 0 0 0 0 0 0\n", - "6 0 958 0 0 0 0 0 0 0 0\n", - "7 0 1,028 0 0 0 0 0 0 0 0\n", - "8 0 974 0 0 0 0 0 0 0 0\n", - "9 0 1,009 0 0 0 0 0 0 0 0\n", + "0 960 0 1 1 4 3 5 1 2 3\n", + "1 1 1,105 4 4 0 3 3 1 14 0\n", + "2 14 5 922 42 12 2 7 4 22 2\n", + "3 13 0 9 916 1 34 0 9 22 6\n", + "4 3 0 3 0 930 0 11 3 5 27\n", + "5 18 0 0 17 5 813 6 4 23 6\n", + "6 11 4 1 0 5 16 914 0 6 1\n", + "7 10 7 15 8 4 1 0 942 4 37\n", + "8 12 2 6 28 10 12 4 2 893 5\n", + "9 12 5 0 21 20 3 0 13 9 926\n", "\n" ] } @@ -497,7 +497,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MNIST CNN training took (00:03:09:806)\n" + "MNIST CNN training took (00:02:57:331)\n" ] } ], @@ -538,32 +538,32 @@ "output_type": "stream", "text": [ "Class n tp fn fp recall prec f1\n", - "0 980 973 7 11 0.993 0.989 0.991\n", - "1 1,135 1,129 6 14 0.995 0.988 0.991\n", - "2 1,032 1,024 8 31 0.992 0.971 0.981\n", - "3 1,010 989 21 17 0.979 0.983 0.981\n", - "4 982 965 17 14 0.983 0.986 0.984\n", - "5 892 865 27 22 0.970 0.975 0.972\n", - "6 958 936 22 2 0.977 0.998 0.987\n", - "7 1,028 1,003 25 13 0.976 0.987 0.981\n", - "8 974 948 26 15 0.973 0.984 0.979\n", - "9 1,009 997 12 32 0.988 0.969 0.978\n", - "Total 10,000 9,829 171 171\n", - "Accuracy 0.983\n", - "Micro Average 0.983 0.983 0.983\n", - "Macro Average 0.983 0.983 0.983\n", - "Balanced Error Rate 0.017\n", + "0 980 968 12 25 0.988 0.975 0.981\n", + "1 1,135 1,123 12 5 0.989 0.996 0.992\n", + "2 1,032 1,013 19 39 0.982 0.963 0.972\n", + "3 1,010 980 30 12 0.970 0.988 0.979\n", + "4 982 963 19 19 0.981 0.981 0.981\n", + "5 892 873 19 21 0.979 0.977 0.978\n", + "6 958 938 20 17 0.979 0.982 0.981\n", + "7 1,028 998 30 14 0.971 0.986 0.978\n", + "8 974 937 37 31 0.962 0.968 0.965\n", + "9 1,009 988 21 36 0.979 0.965 0.972\n", + "Total 10,000 9,781 219 219\n", + "Accuracy 0.978\n", + "Micro Average 0.978 0.978 0.978\n", + "Macro Average 0.978 0.978 0.978\n", + "Balanced Error Rate 0.022\n", " 0 1 2 3 4 5 6 7 8 9\n", - "0 973 2 0 0 0 0 0 2 3 0\n", - "1 0 1,129 2 1 1 0 0 0 2 0\n", - "2 1 3 1,024 0 0 0 0 2 2 0\n", - "3 0 0 4 989 0 9 0 4 1 3\n", - "4 0 1 1 0 965 0 1 0 0 14\n", - "5 1 2 1 10 1 865 1 1 5 5\n", - "6 6 4 3 1 1 7 936 0 0 0\n", - "7 0 2 14 0 1 0 0 1,003 1 7\n", - "8 3 0 6 4 3 5 0 2 948 3\n", - "9 0 0 0 1 7 1 0 2 1 997\n", + "0 968 0 0 0 0 0 6 0 5 1\n", + "1 0 1,123 1 3 0 2 1 0 5 0\n", + "2 3 1 1,013 2 2 0 3 2 5 1\n", + "3 1 0 9 980 0 9 0 4 2 5\n", + "4 1 0 1 0 963 0 4 1 3 9\n", + "5 1 2 0 6 0 873 1 1 3 5\n", + "6 10 1 1 1 2 2 938 0 3 0\n", + "7 0 1 13 0 4 0 0 998 4 8\n", + "8 7 0 13 0 0 7 2 1 937 7\n", + "9 2 0 1 0 11 1 0 5 1 988\n", "\n" ] } @@ -719,7 +719,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "17+35-LTS-2724" + "version": "17.0.1+12-LTS-39" } }, "nbformat": 4,