From a899b90309373b742e66e275ddd19d219eb7b4bc Mon Sep 17 00:00:00 2001 From: Adam Pocock Date: Mon, 24 May 2021 15:18:09 -0400 Subject: [PATCH] Updated tutorials for 4.1 release. --- tutorials/anomaly-tribuo-v4.ipynb | 6 +- tutorials/clustering-tribuo-v4.ipynb | 20 +-- tutorials/columnar-tribuo-v4.ipynb | 8 +- tutorials/configuration-tribuo-v4.ipynb | 41 +++--- .../document-classification-tribuo-v4.ipynb | 22 ++-- tutorials/external-models-tribuo-v4.ipynb | 6 +- tutorials/irises-tribuo-v4.ipynb | 107 ++++++++++------ tutorials/regression-tribuo-v4.ipynb | 16 +-- tutorials/tensorflow-tribuo-v4.ipynb | 118 +++++++++--------- 9 files changed, 193 insertions(+), 151 deletions(-) diff --git a/tutorials/anomaly-tribuo-v4.ipynb b/tutorials/anomaly-tribuo-v4.ipynb index bd8b68ef2..d9c733174 100644 --- a/tutorials/anomaly-tribuo-v4.ipynb +++ b/tutorials/anomaly-tribuo-v4.ipynb @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-anomaly-libsvm-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-anomaly-libsvm-4.1.0-jar-with-dependencies.jar" ] }, { @@ -110,7 +110,7 @@ "obj = 293.8182352369252, rho = 3.201748862633537\n", "nSV = 301, nBSV = 120\n", "\n", - "Training took (00:00:00:115)\n" + "Training took (00:00:00:149)\n" ] } ], @@ -209,7 +209,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/clustering-tribuo-v4.ipynb b/tutorials/clustering-tribuo-v4.ipynb index 12ea48594..9f6f56aac 100644 --- a/tutorials/clustering-tribuo-v4.ipynb +++ b/tutorials/clustering-tribuo-v4.ipynb @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-clustering-kmeans-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-clustering-kmeans-4.1.0-jar-with-dependencies.jar" ] }, { @@ -98,7 +98,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters took (00:00:00:049)\n" + "Training with 5 clusters took (00:00:00:102)\n" ] } ], @@ -175,7 +175,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters took (00:00:00:042)\n" + "Training with 5 clusters took (00:00:00:074)\n" ] } ], @@ -277,8 +277,8 @@ "data": { "text/plain": [ "Clustering Evaluation\n", - "Normalized MI = 0.8154291916732409\n", - "Adjusted MI = 0.8139169342020223" + "Normalized MI = 0.8154291916732408\n", + "Adjusted MI = 0.8139169342020222" ] }, "execution_count": 10, @@ -347,7 +347,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 5 clusters on 4 threads took (00:00:00:038)\n" + "Training with 5 clusters on 4 threads took (00:00:00:062)\n" ] } ], @@ -376,7 +376,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training with 20 clusters on 4 threads took (00:00:00:038)\n" + "Training with 20 clusters on 4 threads took (00:00:00:080)\n" ] } ], @@ -404,8 +404,8 @@ "data": { "text/plain": [ "Clustering Evaluation\n", - "Normalized MI = 0.8104463467727059\n", - "Adjusted MI = 0.8088941747451209" + "Normalized MI = 0.8104463467727057\n", + "Adjusted MI = 0.8088941747451207" ] }, "execution_count": 14, @@ -478,7 +478,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/columnar-tribuo-v4.ipynb b/tutorials/columnar-tribuo-v4.ipynb index 55e27851a..36a27982e 100644 --- a/tutorials/columnar-tribuo-v4.ipynb +++ b/tutorials/columnar-tribuo-v4.ipynb @@ -38,8 +38,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.1.0-jar-with-dependencies.jar" ] }, { @@ -527,7 +527,7 @@ { "data": { "text/plain": [ - "Prediction(maxLabel=(BAD,0.9679724514693198),outputScores={BAD=(BAD,0.9679724514693198)GOOD=(GOOD,0.03202754853068015})" + "Prediction(maxLabel=(BAD,0.96797245146932),outputScores={BAD=(BAD,0.96797245146932),GOOD=(GOOD,0.032027548530680135)})" ] }, "execution_count": 20, @@ -568,7 +568,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/configuration-tribuo-v4.ipynb b/tutorials/configuration-tribuo-v4.ipynb index 7ea105a9a..47872ec4c 100644 --- a/tutorials/configuration-tribuo-v4.ipynb +++ b/tutorials/configuration-tribuo-v4.ipynb @@ -32,8 +32,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.1.0-jar-with-dependencies.jar" ] }, { @@ -404,7 +404,7 @@ { "data": { "text/plain": [ - "XGBoostTrainer(numTrees=10,parameters{colsample_bytree=1.0, silent=1, seed=1, max_depth=4, booster=gbtree, objective=multi:softprob, lambda=1.0, eta=0.5, nthread=6, alpha=1.0, subsample=1.0, gamma=0.1, min_child_weight=1.0})" + "XGBoostTrainer(numTrees=10,parameters{colsample_bytree=1.0, tree_method=auto, seed=1, max_depth=4, booster=gbtree, objective=multi:softprob, lambda=1.0, eta=0.5, nthread=6, alpha=1.0, subsample=1.0, gamma=0.1, min_child_weight=1.0, verbosity=0})" ] }, "execution_count": 11, @@ -487,7 +487,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training logistic regression took (00:00:05:071)\n" + "Training logistic regression took (00:00:03:669)\n" ] } ], @@ -569,8 +569,8 @@ " \"shuffle\" : \"true\",\n", " \"epochs\" : \"2\",\n", " \"optimiser\" : \"adagrad-2\",\n", - " \"objective\" : \"logmulticlass-3\",\n", - " \"loggingInterval\" : \"10000\"\n", + " \"loggingInterval\" : \"10000\",\n", + " \"objective\" : \"logmulticlass-3\"\n", " }\n", " }, {\n", " \"name\" : \"adagrad-2\",\n", @@ -805,7 +805,7 @@ "\t\t\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n", "\t\t\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n", "\t\t\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n", - "\t\t\t\t\t\t\tdatasource-creation-time = 2020-11-02T16:58:33.387902-05:00\n", + "\t\t\t\t\t\t\tdatasource-creation-time = 2021-05-24T12:24:14.958637-04:00\n", "\t\t\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n", "\t\t\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n", @@ -817,7 +817,7 @@ "\t\t\t\t\tnum-examples = 60000\n", "\t\t\t\t\tnum-features = 717\n", "\t\t\t\t\tnum-outputs = 10\n", - "\t\t\t\t\ttribuo-version = 4.1.0-SNAPSHOT\n", + "\t\t\t\t\ttribuo-version = 4.1.0\n", "\t\t\t\t)\n", "\t\t\ttrainer = LinearSGDTrainer(\n", "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.linear.LinearSGDTrainer\n", @@ -832,20 +832,24 @@ "\t\t\t\t\t\t\tinitialValue = 0.0\n", "\t\t\t\t\t\t\thost-short-name = StochasticGradientOptimiser\n", "\t\t\t\t\t\t)\n", + "\t\t\t\t\tloggingInterval = 10000\n", "\t\t\t\t\tobjective = LogMulticlass(\n", "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\t\t\t\t\thost-short-name = LabelObjective\n", "\t\t\t\t\t\t)\n", - "\t\t\t\t\tloggingInterval = 10000\n", + "\t\t\t\t\ttribuo-version = 4.1.0\n", "\t\t\t\t\ttrain-invocation-count = 0\n", "\t\t\t\t\tis-sequence = false\n", "\t\t\t\t\thost-short-name = Trainer\n", "\t\t\t\t)\n", - "\t\t\ttrained-at = 2020-11-02T16:58:40.405866-05:00\n", + "\t\t\ttrained-at = 2021-05-24T12:24:19.604718-04:00\n", "\t\t\tinstance-values = Map{\n", "\t\t\t\treconfigured-model=true\n", "\t\t\t}\n", - "\t\t\ttribuo-version = 4.1.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.1.0\n", + "\t\t\tjava-version = 17-ea\n", + "\t\t\tos-name = Mac OS X\n", + "\t\t\tos-arch = x86_64\n", "\t\t)\n", "\tdataset-provenance = MutableDataset(\n", "\t\t\tclass-name = org.tribuo.MutableDataset\n", @@ -858,7 +862,7 @@ "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/t10k-images-idx3-ubyte.gz\n", "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:19:56-04:00\n", "\t\t\t\t\toutput-resource-hash = F7AE60F92E00EC6DEBD23A6088C31DBD2371ECA3FFA0DEFAEFB259924204AEC6\n", - "\t\t\t\t\tdatasource-creation-time = 2020-11-02T16:58:20.304804-05:00\n", + "\t\t\t\t\tdatasource-creation-time = 2021-05-24T12:24:03.396403-04:00\n", "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:05-04:00\n", "\t\t\t\t\tidx-feature-type = UBYTE\n", "\t\t\t\t\tfeatures-resource-hash = 8D422C7B0A1C1C79245A5BCF07FE86E33EEAFEE792B84584AEC276F5A2DBC4E6\n", @@ -870,9 +874,9 @@ "\t\t\tnum-examples = 10000\n", "\t\t\tnum-features = 668\n", "\t\t\tnum-outputs = 10\n", - "\t\t\ttribuo-version = 4.1.0-SNAPSHOT\n", + "\t\t\ttribuo-version = 4.1.0\n", "\t\t)\n", - "\ttribuo-version = 4.1.0-SNAPSHOT\n", + "\ttribuo-version = 4.1.0\n", ")\n" ] } @@ -900,7 +904,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training transformed logistic regression took (00:00:09:194)\n" + "Training transformed logistic regression took (00:00:05:252)\n" ] } ], @@ -1018,8 +1022,8 @@ " \"shuffle\" : \"true\",\n", " \"epochs\" : \"2\",\n", " \"optimiser\" : \"adagrad-5\",\n", - " \"objective\" : \"logmulticlass-6\",\n", - " \"loggingInterval\" : \"10000\"\n", + " \"loggingInterval\" : \"10000\",\n", + " \"objective\" : \"logmulticlass-6\"\n", " }\n", " }, {\n", " \"name\" : \"transformtrainer-0\",\n", @@ -1027,6 +1031,7 @@ " \"export\" : \"false\",\n", " \"import\" : \"false\",\n", " \"properties\" : {\n", + " \"includeImplicitZeroFeatures\" : \"false\",\n", " \"transformations\" : \"transformationmap-1\",\n", " \"densify\" : \"false\",\n", " \"innerTrainer\" : \"linearsgdtrainer-2\"\n", @@ -1106,7 +1111,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/document-classification-tribuo-v4.ipynb b/tutorials/document-classification-tribuo-v4.ipynb index 0af4c12f9..0c56c915b 100644 --- a/tutorials/document-classification-tribuo-v4.ipynb +++ b/tutorials/document-classification-tribuo-v4.ipynb @@ -49,8 +49,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-onnx-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-onnx-4.1.0-jar-with-dependencies.jar" ] }, { @@ -198,7 +198,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on BoW features took (00:00:09:366)\n", + "Training the model on BoW features took (00:00:09:659)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 352 46 110 0.884 0.762 0.819\n", @@ -291,7 +291,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on Unigram features took (00:00:09:556)\n", + "Training the model on Unigram features took (00:00:10:529)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 362 36 88 0.910 0.804 0.854\n", @@ -381,7 +381,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on Bigram features took (00:00:46:003)\n", + "Training the model on Bigram features took (00:00:41:981)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 331 67 57 0.832 0.853 0.842\n", @@ -480,7 +480,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on TF-IDF features took (00:00:41:742)\n", + "Training the model on TF-IDF features took (00:00:42:471)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 350 48 183 0.879 0.657 0.752\n", @@ -570,7 +570,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on hashed features took (00:00:23:394)\n", + "Training the model on hashed features took (00:00:24:289)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 306 92 125 0.769 0.710 0.738\n", @@ -662,7 +662,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training the model on trimmed TF-IDF features took (00:00:19:872)\n", + "Training the model on trimmed TF-IDF features took (00:00:19:049)\n", "\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 337 61 93 0.847 0.784 0.814\n", @@ -750,7 +750,7 @@ "text": [ "bert training data size = 11314, number of features = 768, number of classes = 20\n", "bert testing data size = 7532, number of features = 768, number of classes = 20\n", - "Extracting features with BERT took (00:59:31:085)\n" + "Extracting features with BERT took (01:06:52:756)\n" ] } ], @@ -789,7 +789,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training a LR on BERT features took (00:00:06:870)\n", + "Training a LR on BERT features took (00:00:08:960)\n", "Class n tp fn fp recall prec f1\n", "soc.religion.christian 398 353 45 111 0.887 0.761 0.819\n", "rec.autos 396 332 64 99 0.838 0.770 0.803\n", @@ -880,7 +880,7 @@ "\t\t\tclass-name = org.tribuo.classification.LabelFactory\n", "\t\t)\n", "\tfile-modified-time = 2003-03-18T07:24:55-05:00\n", - "\tdatasource-creation-time = 2021-05-21T10:33:44.926193-04:00\n", + "\tdatasource-creation-time = 2021-05-24T12:46:58.801385-04:00\n", ")\n" ] } diff --git a/tutorials/external-models-tribuo-v4.ipynb b/tutorials/external-models-tribuo-v4.ipynb index 45bface96..7efb71265 100644 --- a/tutorials/external-models-tribuo-v4.ipynb +++ b/tutorials/external-models-tribuo-v4.ipynb @@ -24,8 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars tribuo-classification-experiments-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars tribuo-onnx-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars tribuo-classification-experiments-4.1.0-jar-with-dependencies.jar\n", + "%jars tribuo-onnx-4.1.0-jar-with-dependencies.jar" ] }, { @@ -469,7 +469,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/irises-tribuo-v4.ipynb b/tutorials/irises-tribuo-v4.ipynb index bfd2af870..2b25514e7 100644 --- a/tutorials/irises-tribuo-v4.ipynb +++ b/tutorials/irises-tribuo-v4.ipynb @@ -27,8 +27,8 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-classification-experiments-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-json-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-classification-experiments-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-json-4.1.0-jar-with-dependencies.jar" ] }, { @@ -392,11 +392,12 @@ "\t\t\tinitialValue = 0.0\n", "\t\t\thost-short-name = StochasticGradientOptimiser\n", "\t\t)\n", + "\tloggingInterval = 1000\n", "\tobjective = LogMulticlass(\n", "\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n", "\t\t\thost-short-name = LabelObjective\n", "\t\t)\n", - "\tloggingInterval = 1000\n", + "\ttribuo-version = 4.1.0\n", "\ttrain-invocation-count = 0\n", "\tis-sequence = false\n", "\thost-short-name = Trainer\n", @@ -457,7 +458,15 @@ " \"tribuo-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"tribuo-version\",\n", - " \"value\" : \"4.1.0-SNAPSHOT\",\n", + " \"value\" : \"4.1.0\",\n", + " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", + " \"additional\" : \"\",\n", + " \"is-reference\" : false\n", + " },\n", + " \"java-version\" : {\n", + " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", + " \"key\" : \"java-version\",\n", + " \"value\" : \"17-ea\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -470,14 +479,30 @@ " \"additional\" : \"\",\n", " \"is-reference\" : true\n", " },\n", + " \"os-arch\" : {\n", + " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", + " \"key\" : \"os-arch\",\n", + " \"value\" : \"x86_64\",\n", + " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", + " \"additional\" : \"\",\n", + " \"is-reference\" : false\n", + " },\n", " \"trained-at\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"trained-at\",\n", - " \"value\" : \"2020-09-29T18:03:58.789235-04:00\",\n", + " \"value\" : \"2021-05-24T12:27:10.387150-04:00\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", " },\n", + " \"os-name\" : {\n", + " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", + " \"key\" : \"os-name\",\n", + " \"value\" : \"Mac OS X\",\n", + " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", + " \"additional\" : \"\",\n", + " \"is-reference\" : false\n", + " },\n", " \"dataset\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"dataset\",\n", @@ -528,7 +553,7 @@ " \"tribuo-version\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"tribuo-version\",\n", - " \"value\" : \"4.1.0-SNAPSHOT\",\n", + " \"value\" : \"4.1.0\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -556,7 +581,7 @@ " \"is-dense\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"is-dense\",\n", - " \"value\" : \"false\",\n", + " \"value\" : \"true\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", @@ -584,6 +609,14 @@ " \"additional\" : \"\",\n", " \"is-reference\" : false\n", " },\n", + " \"tribuo-version\" : {\n", + " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", + " \"key\" : \"tribuo-version\",\n", + " \"value\" : \"4.1.0\",\n", + " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n", + " \"additional\" : \"\",\n", + " \"is-reference\" : false\n", + " },\n", " \"minibatchSize\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"minibatchSize\",\n", @@ -648,14 +681,6 @@ " \"additional\" : \"\",\n", " \"is-reference\" : false\n", " },\n", - " \"objective\" : {\n", - " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", - " \"key\" : \"objective\",\n", - " \"value\" : \"logmulticlass-5\",\n", - " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n", - " \"additional\" : \"\",\n", - " \"is-reference\" : true\n", - " },\n", " \"loggingInterval\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"loggingInterval\",\n", @@ -663,6 +688,14 @@ " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.IntProvenance\",\n", " \"additional\" : \"\",\n", " \"is-reference\" : false\n", + " },\n", + " \"objective\" : {\n", + " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", + " \"key\" : \"objective\",\n", + " \"value\" : \"logmulticlass-5\",\n", + " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n", + " \"additional\" : \"\",\n", + " \"is-reference\" : true\n", " }\n", " }\n", "}, {\n", @@ -798,7 +831,13 @@ " \"map\" : {\n", " \"resource-hash\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", - " \"key\" : \"resource-hash\",\n", + " \"key\" : \"resource-hash\",\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ " \"value\" : \"0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\",\n", " \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.HashProvenance\",\n", " \"additional\" : \"SHA256\",\n", @@ -828,13 +867,7 @@ " \"additional\" : \"\",\n", " \"is-reference\" : false\n", " },\n", - " \"response-name\" : {\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + " \"response-name\" : {\n", " \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n", " \"key\" : \"response-name\",\n", " \"value\" : \"species\",\n", @@ -907,7 +940,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=CSV(class-name=org.tribuo.data.csv.CSVLoader,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),response-name=species,separator=,,quote=\",path=file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,file-modified-time=1999-12-14T15:12:39-05:00,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC]),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=false,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.1.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),loggingInterval=1000,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2020-09-29T18:03:58.789235-04:00,instance-values={},tribuo-version=4.1.0-SNAPSHOT)\n" + "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=CSV(class-name=org.tribuo.data.csv.CSVLoader,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),response-name=species,separator=,,quote=\",path=file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,file-modified-time=1999-12-14T15:12:39-05:00,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC]),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.1.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.1.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-05-24T12:27:10.387150-04:00,instance-values={},tribuo-version=4.1.0,java-version=17-ea,os-name=Mac OS X,os-arch=x86_64)\n" ] } ], @@ -932,12 +965,12 @@ "output_type": "stream", "text": [ "{\n", - " \"tribuo-version\" : \"4.1.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.1.0\",\n", " \"dataset-provenance\" : {\n", " \"num-features\" : \"4\",\n", " \"num-examples\" : \"45\",\n", " \"num-outputs\" : \"3\",\n", - " \"tribuo-version\" : \"4.1.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.1.0\",\n", " \"datasource\" : {\n", " \"train-proportion\" : \"0.7\",\n", " \"seed\" : \"1\",\n", @@ -959,15 +992,17 @@ " },\n", " \"transformations\" : [ ],\n", " \"is-sequence\" : \"false\",\n", - " \"is-dense\" : \"false\",\n", + " \"is-dense\" : \"true\",\n", " \"class-name\" : \"org.tribuo.MutableDataset\"\n", " },\n", " \"class-name\" : \"org.tribuo.provenance.EvaluationProvenance\",\n", " \"model-provenance\" : {\n", " \"instance-values\" : { },\n", - " \"tribuo-version\" : \"4.1.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.1.0\",\n", + " \"java-version\" : \"17-ea\",\n", " \"trainer\" : {\n", " \"seed\" : \"12345\",\n", + " \"tribuo-version\" : \"4.1.0\",\n", " \"minibatchSize\" : \"1\",\n", " \"train-invocation-count\" : \"0\",\n", " \"is-sequence\" : \"false\",\n", @@ -982,18 +1017,20 @@ " },\n", " \"host-short-name\" : \"Trainer\",\n", " \"class-name\" : \"org.tribuo.classification.sgd.linear.LogisticRegressionTrainer\",\n", + " \"loggingInterval\" : \"1000\",\n", " \"objective\" : {\n", " \"host-short-name\" : \"LabelObjective\",\n", " \"class-name\" : \"org.tribuo.classification.sgd.objectives.LogMulticlass\"\n", - " },\n", - " \"loggingInterval\" : \"1000\"\n", + " }\n", " },\n", - " \"trained-at\" : \"2020-09-29T18:03:58.789235-04:00\",\n", + " \"os-arch\" : \"x86_64\",\n", + " \"trained-at\" : \"2021-05-24T12:27:10.387150-04:00\",\n", + " \"os-name\" : \"Mac OS X\",\n", " \"dataset\" : {\n", " \"num-features\" : \"4\",\n", " \"num-examples\" : \"105\",\n", " \"num-outputs\" : \"3\",\n", - " \"tribuo-version\" : \"4.1.0-SNAPSHOT\",\n", + " \"tribuo-version\" : \"4.1.0\",\n", " \"datasource\" : {\n", " \"train-proportion\" : \"0.7\",\n", " \"seed\" : \"1\",\n", @@ -1015,7 +1052,7 @@ " },\n", " \"transformations\" : [ ],\n", " \"is-sequence\" : \"false\",\n", - " \"is-dense\" : \"false\",\n", + " \"is-dense\" : \"true\",\n", " \"class-name\" : \"org.tribuo.MutableDataset\"\n", " },\n", " \"class-name\" : \"org.tribuo.classification.sgd.linear.LinearSGDModel\"\n", @@ -1093,7 +1130,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1162,7 +1199,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/regression-tribuo-v4.ipynb b/tutorials/regression-tribuo-v4.ipynb index 2a703772b..4e31561f1 100644 --- a/tutorials/regression-tribuo-v4.ipynb +++ b/tutorials/regression-tribuo-v4.ipynb @@ -23,10 +23,10 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-json-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-sgd-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-xgboost-4.1.0-SNAPSHOT-jar-with-dependencies.jar\n", - "%jars ./tribuo-regression-tree-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-json-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-sgd-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-xgboost-4.1.0-jar-with-dependencies.jar\n", + "%jars ./tribuo-regression-tree-4.1.0-jar-with-dependencies.jar" ] }, { @@ -264,7 +264,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training Linear Regression (SGD) took (00:00:00:077)\n", + "Training Linear Regression (SGD) took (00:00:00:123)\n", "Evaluation (train):\n", " RMSE 0.979522\n", " MAE 0.741870\n", @@ -357,7 +357,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training Linear Regression (AdaGrad) took (00:00:00:047)\n", + "Training Linear Regression (AdaGrad) took (00:00:00:091)\n", "Evaluation (train):\n", " RMSE 0.735311\n", " MAE 0.575096\n", @@ -436,7 +436,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training XGBoost took (00:00:00:420)\n", + "Training XGBoost took (00:00:00:320)\n", "Evaluation (train):\n", " RMSE 0.143871\n", " MAE 0.097167\n", @@ -477,7 +477,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+14" + "version": "17-ea+22-1964" } }, "nbformat": 4, diff --git a/tutorials/tensorflow-tribuo-v4.ipynb b/tutorials/tensorflow-tribuo-v4.ipynb index e041548e9..3fdce9c7a 100644 --- a/tutorials/tensorflow-tribuo-v4.ipynb +++ b/tutorials/tensorflow-tribuo-v4.ipynb @@ -41,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "%jars ./tribuo-tensorflow-4.1.0-SNAPSHOT-jar-with-dependencies.jar" + "%jars ./tribuo-tensorflow-4.1.0-jar-with-dependencies.jar" ] }, { @@ -280,7 +280,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Wine quality training took (00:00:01:430)\n" + "Wine quality training took (00:00:01:421)\n" ] } ], @@ -308,9 +308,9 @@ "output_type": "stream", "text": [ "Wine quality evaluation:\n", - " RMSE 0.651382\n", - " MAE 0.509830\n", - " R^2 0.347543\n", + " RMSE 0.650110\n", + " MAE 0.507774\n", + " R^2 0.350089\n", "\n" ] } @@ -401,7 +401,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MNIST MLP training took (00:00:47:630)\n" + "MNIST MLP training took (00:00:50:286)\n" ] } ], @@ -429,32 +429,32 @@ "output_type": "stream", "text": [ "Class n tp fn fp recall prec f1\n", - "0 980 952 28 37 0.971 0.963 0.967\n", - "1 1,135 1,108 27 12 0.976 0.989 0.983\n", - "2 1,032 978 54 55 0.948 0.947 0.947\n", - "3 1,010 933 77 82 0.924 0.919 0.921\n", - "4 982 945 37 58 0.962 0.942 0.952\n", - "5 892 837 55 119 0.938 0.876 0.906\n", - "6 958 897 61 15 0.936 0.984 0.959\n", - "7 1,028 953 75 13 0.927 0.987 0.956\n", - "8 974 897 77 69 0.921 0.929 0.925\n", - "9 1,009 934 75 106 0.926 0.898 0.912\n", - "Total 10,000 9,434 566 566\n", - "Accuracy 0.943\n", - "Micro Average 0.943 0.943 0.943\n", - "Macro Average 0.943 0.943 0.943\n", - "Balanced Error Rate 0.057\n", + "0 980 952 28 42 0.971 0.958 0.965\n", + "1 1,135 1,112 23 14 0.980 0.988 0.984\n", + "2 1,032 956 76 72 0.926 0.930 0.928\n", + "3 1,010 944 66 97 0.935 0.907 0.921\n", + "4 982 916 66 50 0.933 0.948 0.940\n", + "5 892 793 99 34 0.889 0.959 0.923\n", + "6 958 924 34 39 0.965 0.960 0.962\n", + "7 1,028 973 55 31 0.946 0.969 0.958\n", + "8 974 910 64 126 0.934 0.878 0.905\n", + "9 1,009 931 78 84 0.923 0.917 0.920\n", + "Total 10,000 9,411 589 589\n", + "Accuracy 0.941\n", + "Micro Average 0.941 0.941 0.941\n", + "Macro Average 0.940 0.941 0.941\n", + "Balanced Error Rate 0.060\n", " 0 1 2 3 4 5 6 7 8 9\n", - "0 952 0 0 0 3 5 8 1 4 7\n", - "1 1 1,108 6 0 0 1 1 1 15 2\n", - "2 4 1 978 21 6 6 0 2 8 6\n", - "3 0 0 12 933 0 46 0 1 15 3\n", - "4 1 0 0 2 945 1 3 0 0 30\n", - "5 8 0 0 20 5 837 3 3 13 3\n", - "6 12 2 1 3 5 32 897 0 3 3\n", - "7 1 5 27 1 2 0 0 953 1 38\n", - "8 3 1 6 24 4 22 0 3 897 14\n", - "9 7 3 3 11 33 6 0 2 10 934\n", + "0 952 0 7 2 2 2 8 2 5 0\n", + "1 0 1,112 2 2 0 1 4 1 9 4\n", + "2 9 0 956 32 4 3 4 6 16 2\n", + "3 0 0 23 944 1 6 0 5 25 6\n", + "4 3 1 7 2 916 0 10 1 6 36\n", + "5 7 2 10 21 3 793 12 1 41 2\n", + "6 8 2 4 0 6 8 924 0 6 0\n", + "7 2 2 13 6 2 4 0 973 2 24\n", + "8 5 0 3 18 13 8 1 6 910 10\n", + "9 8 7 3 14 19 2 0 9 16 931\n", "\n" ] } @@ -504,7 +504,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "MNIST CNN training took (00:02:39:269)\n" + "MNIST CNN training took (00:02:40:480)\n" ] } ], @@ -546,32 +546,32 @@ "output_type": "stream", "text": [ "Class n tp fn fp recall prec f1\n", - "0 980 974 6 18 0.994 0.982 0.988\n", - "1 1,135 1,130 5 8 0.996 0.993 0.994\n", - "2 1,032 1,012 20 27 0.981 0.974 0.977\n", - "3 1,010 989 21 26 0.979 0.974 0.977\n", - "4 982 968 14 17 0.986 0.983 0.984\n", - "5 892 867 25 29 0.972 0.968 0.970\n", - "6 958 925 33 9 0.966 0.990 0.978\n", - "7 1,028 1,008 20 25 0.981 0.976 0.978\n", - "8 974 941 33 16 0.966 0.983 0.975\n", - "9 1,009 988 21 23 0.979 0.977 0.978\n", - "Total 10,000 9,802 198 198\n", - "Accuracy 0.980\n", - "Micro Average 0.980 0.980 0.980\n", - "Macro Average 0.980 0.980 0.980\n", - "Balanced Error Rate 0.020\n", + "0 980 968 12 25 0.988 0.975 0.981\n", + "1 1,135 1,123 12 9 0.989 0.992 0.991\n", + "2 1,032 1,020 12 55 0.988 0.949 0.968\n", + "3 1,010 980 30 24 0.970 0.976 0.973\n", + "4 982 963 19 27 0.981 0.973 0.977\n", + "5 892 873 19 46 0.979 0.950 0.964\n", + "6 958 922 36 8 0.962 0.991 0.977\n", + "7 1,028 960 68 13 0.934 0.987 0.960\n", + "8 974 937 37 22 0.962 0.977 0.969\n", + "9 1,009 978 31 47 0.969 0.954 0.962\n", + "Total 10,000 9,724 276 276\n", + "Accuracy 0.972\n", + "Micro Average 0.972 0.972 0.972\n", + "Macro Average 0.972 0.972 0.972\n", + "Balanced Error Rate 0.028\n", " 0 1 2 3 4 5 6 7 8 9\n", - "0 974 1 0 0 0 0 2 2 1 0\n", - "1 0 1,130 2 0 1 1 1 0 0 0\n", - "2 1 1 1,012 8 1 0 2 5 2 0\n", - "3 0 0 6 989 0 10 0 4 1 0\n", - "4 1 0 1 0 968 0 3 0 2 7\n", - "5 1 1 0 11 0 867 1 0 6 5\n", - "6 12 2 3 0 2 12 925 0 2 0\n", - "7 0 1 10 2 0 0 0 1,008 1 6\n", - "8 2 1 4 4 5 6 0 6 941 5\n", - "9 1 1 1 1 8 0 0 8 1 988\n", + "0 968 0 3 0 0 1 3 1 3 1\n", + "1 0 1,123 3 0 2 1 2 0 4 0\n", + "2 1 1 1,020 3 3 0 1 1 1 1\n", + "3 1 0 6 980 0 15 0 3 4 1\n", + "4 0 0 3 1 963 0 1 0 1 13\n", + "5 1 0 0 9 0 873 1 1 2 5\n", + "6 13 3 2 1 2 11 922 0 3 1\n", + "7 0 1 28 8 7 1 0 960 2 21\n", + "8 8 0 9 1 2 11 0 2 937 4\n", + "9 1 4 1 1 11 6 0 5 2 978\n", "\n" ] } @@ -587,7 +587,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we might expect, exploiting the structured nature of images lets us get better performance, with 98% accuracy after only 3 epochs. There is a wide variety of different CNN architectures, each suited for different kinds of tasks. Some are even applied to sequential data like text.\n", + "As we might expect, exploiting the structured nature of images lets us get better performance, with 97% accuracy after only 3 epochs. There is a wide variety of different CNN architectures, each suited for different kinds of tasks. Some are even applied to sequential data like text.\n", "\n", "## Exporting and Importing TensorFlow models\n", "\n", @@ -727,7 +727,7 @@ "mimetype": "text/x-java-source", "name": "Java", "pygments_lexer": "java", - "version": "16+36-2231" + "version": "17-ea+22-1964" } }, "nbformat": 4,