diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index b5e7e1b5e3..c7a9a4110d 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -23,8 +23,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.StandardLearne EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Transforms", "src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj", "{2911A286-ECA4-4730-97A9-DA1FEE2DED97}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Legacy", "src\Microsoft.ML.Legacy\Microsoft.ML.Legacy.csproj", "{7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.KMeansClustering", "src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj", "{F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.PCA", "src\Microsoft.ML.PCA\Microsoft.ML.PCA.csproj", "{58E06735-1129-4DD5-86E0-6BBFF049AAD9}" @@ -147,7 +145,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.HalLearners.St EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.OnnxTransform.StaticPipe", "src\Microsoft.ML.OnnxTransform.StaticPipe\Microsoft.ML.OnnxTransform.StaticPipe.csproj", "{D1324668-9568-40F4-AA55-30A9A516C230}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.LightGBM.StaticPipe", "src\Microsoft.ML.LightGBM.StaticPipe\Microsoft.ML.LightGBM.StaticPipe.csproj", "{22C51B08-ACAE-47B2-A312-462DC239A23B}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.LightGBM.StaticPipe", "src\Microsoft.ML.LightGBM.StaticPipe\Microsoft.ML.LightGBM.StaticPipe.csproj", "{22C51B08-ACAE-47B2-A312-462DC239A23B}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -213,14 +211,6 @@ Global {2911A286-ECA4-4730-97A9-DA1FEE2DED97}.Release|Any CPU.Build.0 = Release|Any CPU {2911A286-ECA4-4730-97A9-DA1FEE2DED97}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU {2911A286-ECA4-4730-97A9-DA1FEE2DED97}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Debug|Any CPU.Build.0 = Debug|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Release|Any CPU.ActiveCfg = Release|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Release|Any CPU.Build.0 = Release|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU {F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A}.Debug|Any CPU.Build.0 = Debug|Any CPU {F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU @@ -593,7 +583,6 @@ Global {65D0603E-B96C-4DFC-BDD1-705891B88C18} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {707BB22C-7E5F-497A-8C2F-74578F675705} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {2911A286-ECA4-4730-97A9-DA1FEE2DED97} = {09EADF06-BE25-4228-AB53-95AE3E15B530} - {7288C084-11C0-43BE-AC7F-45DCFEAEEBF6} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {58E06735-1129-4DD5-86E0-6BBFF049AAD9} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {64BC22D3-1E76-41EF-94D8-C79E471FF2DD} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} diff --git a/build/Dependencies.props b/build/Dependencies.props index 8d2f7abdc1..d10339d749 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -15,7 +15,7 @@ 3.5.1 2.2.1.1 - 0.1.5 + 0.1.5 0.0.0.7 2.1.3 4.5.0 diff --git a/docs/release-notes/0.9/release-0.9.md b/docs/release-notes/0.9/release-0.9.md new file mode 100644 index 0000000000..ddb76f1202 --- /dev/null +++ b/docs/release-notes/0.9/release-0.9.md @@ -0,0 +1,51 @@ +# ML.NET 0.9 Release Notes + +Welcome to 2019! For the past 9 months we have been adding features and improving [ML.NET](https://aka.ms/mlnet). In the forthcoming 0.10, 0.11, and 0.12 releases before we reach 1.0, we will focus on the overall stability of the package, continue to refine the API, increase test coverage and improve documentation. 0.9 release packs multiple fixes as well as significant clean up to the internal code of the package. + +### Installation + +ML.NET supports Windows, MacOS, and Linux. See [supported OS versions of .NET +Core +2.0](https://github.com/dotnet/core/blob/master/release-notes/2.0/2.0-supported-os.md) +for more details. + +You can install ML.NET NuGet from the CLI using: +``` +dotnet add package Microsoft.ML +``` + +From package manager: +``` +Install-Package Microsoft.ML +``` + +### Release Notes + +Below are a few of the highlights from this release. There are many other improvements in the API. + +* Added Feature Contribution Calculation + ([#1847](https://github.com/dotnet/machinelearning/pull/1847)) + + * FCC can be used to compute feature contributions in addition to the overall prediction when models are evaluated. + +* Removed Legacy namespace that was marked obsolete + ([#2043](https://github.com/dotnet/machinelearning/pull/2043)) + +* GPU support for ONNX Transform + ([#1922](https://github.com/dotnet/machinelearning/pull/1922)) + + * GPU is currently supported on 64 bit Windows + * Cross platform support is still being developed for this feature + +* `Permutation Feature Importance` now supports confidence intervals + ([#1844](https://github.com/dotnet/machinelearning/pull/1844)) + +* Introducing `PredictionEngine` instead of `PredictionFunction` + ([#1920](https://github.com/dotnet/machinelearning/pull/1920)) + +### Acknowledgements + +Shoutout to [dhilmathy](https://github.com/dhilmathy), +[mnboos](https://github.com/mnboos), +[robosek](https://github.com/robosek), and the [ML.NET](https://aka.ms/mlnet) team for their +contributions as part of this release! \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs index 96ef491bb7..3af22fba9b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs @@ -8,7 +8,7 @@ public class FeatureContributionCalculationTransform_RegressionExample public static void FeatureContributionCalculationTransform_Regression() { // Downloading the dataset from github.com/dotnet/machinelearning. - // This will create a sentiment.tsv file in the filesystem. + // This will create a housing.txt file in the filesystem. // You can open this file, if you want to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); @@ -59,12 +59,12 @@ public static void FeatureContributionCalculationTransform_Regression() // Create a Feature Contribution Calculator // Calculate the feature contributions for all features given trained model parameters // And don't normalize the contribution scores - var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11, normalize: false); + var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11, normalize: false); var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData); // FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline. // The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column. - var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11) + var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11) .Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions")); var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData); @@ -72,7 +72,7 @@ public static void FeatureContributionCalculationTransform_Regression() var weights = new VBuffer(); model.Model.GetFeatureWeights(ref weights); - // Let's now walk through the first ten reconds and see which feature drove the values the most + // Let's now walk through the first ten records and see which feature drove the values the most // Get prediction scores and contributions var scoringEnumerator = outputData.AsEnumerable(mlContext, true).GetEnumerator(); int index = 0; diff --git a/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs index 176f55eff6..3dba26d701 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs @@ -33,7 +33,7 @@ public static void AveragedPerceptronBinaryClassification() var mlContext = new MLContext(); // Creating Data Reader with the initial schema based on the format of the data - var reader = TextLoader.CreateReader( + var reader = TextLoaderStatic.CreateReader( mlContext, c => ( Age: c.LoadFloat(0), diff --git a/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs index 55835aa0b3..87ed0e2309 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs @@ -33,7 +33,7 @@ public static void FastTreeBinaryClassification() var mlContext = new MLContext(); // Creating Data Reader with the initial schema based on the format of the data - var reader = TextLoader.CreateReader( + var reader = TextLoaderStatic.CreateReader( mlContext, c => ( Age: c.LoadFloat(0), diff --git a/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs index 10e3edb7ea..60ee9cded5 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs @@ -20,7 +20,7 @@ public static void FastTreeRegression() var mlContext = new MLContext(); // Creating a data reader, based on the format of the data - var reader = TextLoader.CreateReader(mlContext, c => ( + var reader = TextLoaderStatic.CreateReader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), diff --git a/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs index 6c130dfc4b..e91e4cf42d 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs @@ -31,7 +31,7 @@ public static void FeatureSelectionTransform() // First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from // all the feature columns into entries of a vector of a single column named "Features". - var reader = TextLoader.CreateReader(ml, c => ( + var reader = TextLoaderStatic.CreateReader(ml, c => ( Label: c.LoadBool(0), Features: c.LoadFloat(1, 9) ), diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs index bee5be6c21..5247355663 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs @@ -34,7 +34,7 @@ public static void LightGbmBinaryClassification() var mlContext = new MLContext(); // Creating Data Reader with the initial schema based on the format of the data - var reader = TextLoader.CreateReader( + var reader = TextLoaderStatic.CreateReader( mlContext, c => ( Age: c.LoadFloat(0), diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs index 45bf277157..fffad9181b 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs @@ -2,6 +2,7 @@ using Microsoft.ML.Data; using Microsoft.ML.LightGBM; using Microsoft.ML.LightGBM.StaticPipe; +using Microsoft.ML.StaticPipe; namespace Microsoft.ML.Samples.Static { @@ -19,7 +20,7 @@ public static void LightGbmRegression() var mlContext = new MLContext(); // Creating a data reader, based on the format of the data - var reader = TextLoader.CreateReader(mlContext, c => ( + var reader = TextLoaderStatic.CreateReader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs index 122057a96e..7ca69fd27d 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs @@ -33,7 +33,7 @@ public static void SdcaBinaryClassification() var mlContext = new MLContext(); // Creating Data Reader with the initial schema based on the format of the data - var reader = TextLoader.CreateReader( + var reader = TextLoaderStatic.CreateReader( mlContext, c => ( Age: c.LoadFloat(0), diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs index 93c680d950..50d5b8c6aa 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs @@ -18,7 +18,7 @@ public static void SdcaRegression() var mlContext = new MLContext(); // Creating a data reader, based on the format of the data - var reader = TextLoader.CreateReader(mlContext, c => ( + var reader = TextLoaderStatic.CreateReader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), diff --git a/pkg/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.nupkgproj b/pkg/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.nupkgproj index b817e809d1..27c03c1848 100644 --- a/pkg/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.nupkgproj +++ b/pkg/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.nupkgproj @@ -7,7 +7,7 @@ - + diff --git a/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.nupkgproj b/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.nupkgproj new file mode 100644 index 0000000000..564f4333de --- /dev/null +++ b/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.nupkgproj @@ -0,0 +1,14 @@ + + + + netstandard2.0 + ML.NET component for a statically typed API. + + + + + + + + + diff --git a/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.symbols.nupkgproj b/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.symbols.nupkgproj new file mode 100644 index 0000000000..a4b942a712 --- /dev/null +++ b/pkg/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.symbols.nupkgproj @@ -0,0 +1,5 @@ + + + + + diff --git a/src/Microsoft.ML.Core/Data/ColumnType.cs b/src/Microsoft.ML.Core/Data/ColumnType.cs index eb3d080ecd..bb75ed778c 100644 --- a/src/Microsoft.ML.Core/Data/ColumnType.cs +++ b/src/Microsoft.ML.Core/Data/ColumnType.cs @@ -21,9 +21,7 @@ public abstract class ColumnType : IEquatable // This private constructor sets all the IsXxx flags. It is invoked by other ctors. private ColumnType() { - IsPrimitive = this is PrimitiveType; IsVector = this is VectorType; - IsNumber = this is NumberType; IsKey = this is KeyType; } @@ -73,58 +71,6 @@ private protected ColumnType(Type rawType, DataKind rawKind) [BestFriend] internal DataKind RawKind { get; } - /// - /// Whether this is a primitive type. External code should use is . - /// - [BestFriend] - internal bool IsPrimitive { get; } - - /// - /// Whether this type is a standard numeric type. External code should use is . - /// - [BestFriend] - internal bool IsNumber { get; } - - /// - /// Whether this type is the standard text type. External code should use is . - /// - [BestFriend] - internal bool IsText - { - get - { - if (!(this is TextType)) - return false; - // TextType is a singleton. - Contracts.Assert(this == TextType.Instance); - return true; - } - } - - /// - /// Whether this type is the standard boolean type. External code should use is . - /// - [BestFriend] - internal bool IsBool - { - get - { - if (!(this is BoolType)) - return false; - // BoolType is a singleton. - Contracts.Assert(this == BoolType.Instance); - return true; - } - } - - /// - /// Whether this type is a standard scalar type completely determined by its - /// (not a or , etc). - /// - [BestFriend] - internal bool IsStandardScalar => IsNumber || IsText || IsBool || - (this is TimeSpanType) || (this is DateTimeType) || (this is DateTimeOffsetType); - /// /// Whether this type is a key type, which implies that the order of values is not significant, /// and arithmetic is non-sensical. A key type can define a cardinality. @@ -230,13 +176,11 @@ public abstract class StructuredType : ColumnType protected StructuredType(Type rawType) : base(rawType) { - Contracts.Assert(!IsPrimitive); } private protected StructuredType(Type rawType, DataKind rawKind) : base(rawType, rawKind) { - Contracts.Assert(!IsPrimitive); } } @@ -249,7 +193,6 @@ public abstract class PrimitiveType : ColumnType protected PrimitiveType(Type rawType) : base(rawType) { - Contracts.Assert(IsPrimitive); Contracts.CheckParam(!typeof(IDisposable).IsAssignableFrom(RawType), nameof(rawType), "A " + nameof(PrimitiveType) + " cannot have a disposable " + nameof(RawType)); } @@ -257,7 +200,6 @@ protected PrimitiveType(Type rawType) private protected PrimitiveType(Type rawType, DataKind rawKind) : base(rawType, rawKind) { - Contracts.Assert(IsPrimitive); Contracts.Assert(!typeof(IDisposable).IsAssignableFrom(RawType)); } @@ -322,7 +264,6 @@ private NumberType(DataKind kind, string name) { Contracts.AssertNonEmpty(name); _name = name; - Contracts.Assert(IsNumber); } private static volatile NumberType _instI1; @@ -496,7 +437,7 @@ public override bool Equals(ColumnType other) { if (other == this) return true; - Contracts.Assert(other == null || !other.IsNumber || other.RawKind != RawKind); + Contracts.Assert(other == null || !(other is NumberType) || other.RawKind != RawKind); return false; } diff --git a/src/Microsoft.ML.Core/Data/ColumnTypeExtensions.cs b/src/Microsoft.ML.Core/Data/ColumnTypeExtensions.cs new file mode 100644 index 0000000000..2dcba77703 --- /dev/null +++ b/src/Microsoft.ML.Core/Data/ColumnTypeExtensions.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Data +{ + /// + /// Extension methods related to the ColumnType class. + /// + [BestFriend] + internal static class ColumnTypeExtensions + { + /// + /// Whether this type is a standard scalar type completely determined by its + /// (not a or , etc). + /// + public static bool IsStandardScalar(this ColumnType columnType) => + (columnType is NumberType) || (columnType is TextType) || (columnType is BoolType) || + (columnType is TimeSpanType) || (columnType is DateTimeType) || (columnType is DateTimeOffsetType); + } +} diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index efa22d134a..b4804c5337 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -283,7 +283,7 @@ internal static IEnumerable GetColumnSet(this Schema schema, string metadat for (int col = 0; col < schema.Count; col++) { var columnType = schema[col].Metadata.Schema.GetColumnOrNull(metadataKind)?.Type; - if (columnType != null && columnType.IsText) + if (columnType != null && columnType is TextType) { ReadOnlyMemory val = default; schema[col].Metadata.GetValue(metadataKind, ref val); @@ -318,7 +318,7 @@ internal static bool HasSlotNames(this Schema.Column column, int vectorSize) metaColumn != null && metaColumn.Value.Type.IsVector && metaColumn.Value.Type.VectorSize == vectorSize - && metaColumn.Value.Type.ItemType.IsText; + && metaColumn.Value.Type.ItemType is TextType; } public static void GetSlotNames(this Schema.Column column, ref VBuffer> slotNames) @@ -348,7 +348,7 @@ internal static bool HasKeyValues(this Schema.Column column, int keyCount) metaColumn != null && metaColumn.Value.Type.IsVector && metaColumn.Value.Type.VectorSize == keyCount - && metaColumn.Value.Type.ItemType.IsText; + && metaColumn.Value.Type.ItemType is TextType; } [BestFriend] @@ -356,7 +356,7 @@ internal static bool HasKeyValues(this SchemaShape.Column col) { return col.Metadata.TryFindColumn(Kinds.KeyValues, out var metaCol) && metaCol.Kind == SchemaShape.Column.VectorKind.Vector - && metaCol.ItemType.IsText; + && metaCol.ItemType is TextType; } /// @@ -365,7 +365,7 @@ internal static bool HasKeyValues(this SchemaShape.Column col) public static bool IsNormalized(this Schema.Column column) { var metaColumn = column.Metadata.Schema.GetColumnOrNull((Kinds.IsNormalized)); - if (metaColumn == null || !metaColumn.Value.Type.IsBool) + if (metaColumn == null || !(metaColumn.Value.Type is BoolType)) return false; bool value = default; diff --git a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs index 58bda21829..2cc96116b6 100644 --- a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs @@ -15,7 +15,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.OnnxTransformTest" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EntryPoints" + PublicKey.Value)] -[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Maml" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.ResultProcessor" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CpuMath" + PublicKey.Value)] @@ -47,4 +46,55 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.OnnxTransform.StaticPipe" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGBM.StaticPipe" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TreeVisualizer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CntkWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.DssmFeaturizer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DssmTrigram" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EdgeML" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "ExperimentVisualization" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.FastTree" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Garage" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.HelperCommands" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Internal.ImageAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "ImageNetClientSample" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.InferNetWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TlcTesting.Inference" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.RecipeInference" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.LDSVM" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "LibSvmWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "maml" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TLCTelemetry, PublicKey=0024000004800000940000000602000000240000525341310004000001000100edfd1aca74f2e8d6d7fd53d6f0bf77982ea38a5d2dc462f2f9f01f0734d87a8480279e02c42df82024ea146bf9721a86c50740b9809e4f379a848e1c5168bbfc2b3969aacec1822b3a29db0a23dce5502d764bc0d6ca68f5e4a53ad5212c427567545ec7027402b908460476bcc690a730d25532399e9ce5de32bfdc2211f7e0")] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.Api" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.PyTrainer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "ParameterMixer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.OcrTransform" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.TextAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TextAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "StratoLearner" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.SequencePrediction" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Internal.Opencv" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Sar" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.PowerShellIntegration" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TMSNStreams" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TlcAzurePublish" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.VowpalWabbit" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.XGBoost" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.SLib" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTestsMore" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Sweeper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TlcCustomModule" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.SweeperHpc" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Scope" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "NeuralNetworksTest" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunEndToEnd" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTestsAzurePublish" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "SseTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TLC" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Core/PublicKey.cs b/src/Microsoft.ML.Core/PublicKey.cs index 63718c3f8e..639817b83f 100644 --- a/src/Microsoft.ML.Core/PublicKey.cs +++ b/src/Microsoft.ML.Core/PublicKey.cs @@ -17,4 +17,10 @@ internal static class PublicKey public const string Value = ", PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb"; public const string TestValue = ", PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4"; } + + [BestFriend] + internal static class InternalPublicKey + { + public const string Value = ", PublicKey=0024000004800000940000000602000000240000525341310004000001000100bd8dded65b44bf8183068bd6dae3b68ba499202b2909640604cf63c7c0ea95bec94a400af533d1132e0dba214f310f666486b50ea91f2697a4fe331eb6a8d7306029344e320dabb7c4c3617472e3088c28dbfcf761a3f1b954a2a64cb865aae873b1d3c3cab344661cd7d5929d1043912908b8dd321889ca11f29d6bf9b9b9a9"; + } } diff --git a/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs b/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs index ab9968b399..1d0ef92696 100644 --- a/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs @@ -3,5 +3,13 @@ // See the LICENSE file in the project root for more information. using System.Runtime.CompilerServices; +using Microsoft.ML.Internal.CpuMath.Core; -[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CpuMath.PerformanceTests, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")] \ No newline at end of file +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CpuMath.PerformanceTests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.CpuMath" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Internal.MklMath" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "LibSvmWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "SseTests" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.Data/Commands/DataCommand.cs b/src/Microsoft.ML.Data/Commands/DataCommand.cs index d74cd2cd03..119a9a4e5d 100644 --- a/src/Microsoft.ML.Data/Commands/DataCommand.cs +++ b/src/Microsoft.ML.Data/Commands/DataCommand.cs @@ -167,7 +167,7 @@ protected void SendTelemetryMetric(Dictionary[] metricValues) { var nameOfMetric = "TLC_" + cursor.Schema[currentIndex].Name; var type = cursor.Schema[currentIndex].Type; - if (type.IsNumber) + if (type is NumberType) { var getter = RowCursorUtils.GetGetterAs(NumberType.R8, cursor, currentIndex); double metricValue = 0; diff --git a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs index 80c3249aaa..04b63afa5a 100644 --- a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs +++ b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs @@ -154,7 +154,7 @@ private static void PrintSchema(TextWriter writer, Arguments args, Schema schema ColumnType typeNames; if ((typeNames = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) == null) continue; - if (typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText) + if (typeNames.VectorSize != type.VectorSize || !(typeNames.ItemType is TextType)) { Contracts.Assert(false, "Unexpected slot names type"); continue; @@ -212,7 +212,7 @@ private static void ShowMetadataValue(IndentedTextWriter itw, Schema schema, int Contracts.AssertValue(type); Contracts.Assert(!type.IsVector); - if (!type.IsStandardScalar && !type.IsKey) + if (!type.IsStandardScalar() && !type.IsKey) { itw.Write(": Can't display value of this type"); return; @@ -252,7 +252,7 @@ private static void ShowMetadataValueVec(IndentedTextWriter itw, Schema schema, Contracts.AssertValue(type); Contracts.Assert(type.IsVector); - if (!type.ItemType.IsStandardScalar && !type.ItemType.IsKey) + if (!type.ItemType.IsStandardScalar() && !type.ItemType.IsKey) { itw.Write(": Can't display value of this type"); return; diff --git a/src/Microsoft.ML.Data/Commands/TypeInfoCommand.cs b/src/Microsoft.ML.Data/Commands/TypeInfoCommand.cs index e7f0bb5d15..1bbbb4ec8a 100644 --- a/src/Microsoft.ML.Data/Commands/TypeInfoCommand.cs +++ b/src/Microsoft.ML.Data/Commands/TypeInfoCommand.cs @@ -137,7 +137,7 @@ private TypeNaInfo KindReport(IChannel ch, PrimitiveType type) { Contracts.AssertValue(ch); ch.AssertValue(type); - ch.Assert(type.IsStandardScalar); + ch.Assert(type.IsStandardScalar()); var conv = Conversions.Instance; InPredicate isNaDel; diff --git a/src/Microsoft.ML.Data/Data/Conversion.cs b/src/Microsoft.ML.Data/Data/Conversion.cs index 3b1a9a9628..e6bd926380 100644 --- a/src/Microsoft.ML.Data/Data/Conversion.cs +++ b/src/Microsoft.ML.Data/Data/Conversion.cs @@ -452,12 +452,12 @@ public bool TryGetStandardConversion(ColumnType typeSrc, ColumnType typeDst, } else if (typeDst is KeyType keyDst) { - if (!typeSrc.IsText) + if (!(typeSrc is TextType)) return false; conv = GetKeyParse(keyDst); return true; } - else if (!typeDst.IsStandardScalar) + else if (!typeDst.IsStandardScalar()) return false; Contracts.Assert(typeSrc.RawKind != 0); @@ -567,7 +567,7 @@ public ValueMapper GetKeyStringConversion(KeyType key) public TryParseMapper GetTryParseConversion(ColumnType typeDst) { Contracts.CheckValue(typeDst, nameof(typeDst)); - Contracts.CheckParam(typeDst.IsStandardScalar || typeDst.IsKey, nameof(typeDst), + Contracts.CheckParam(typeDst.IsStandardScalar() || typeDst.IsKey, nameof(typeDst), "Parse conversion only supported for standard types"); Contracts.Check(typeDst.RawType == typeof(TDst), "Wrong TDst type parameter"); @@ -676,7 +676,7 @@ public InPredicate GetIsDefaultPredicate(ColumnType type) var t = type; Delegate del; - if (!t.IsStandardScalar && !t.IsKey || !_isDefaultDelegates.TryGetValue(t.RawKind, out del)) + if (!t.IsStandardScalar() && !t.IsKey || !_isDefaultDelegates.TryGetValue(t.RawKind, out del)) throw Contracts.Except("No IsDefault predicate for '{0}'", type); return (InPredicate)del; @@ -719,7 +719,7 @@ public bool TryGetIsNAPredicate(ColumnType type, out Delegate del) Contracts.Assert(_isDefaultDelegates.ContainsKey(t.RawKind)); del = _isDefaultDelegates[t.RawKind]; } - else if (!t.IsStandardScalar || !_isNADelegates.TryGetValue(t.RawKind, out del)) + else if (!t.IsStandardScalar() || !_isNADelegates.TryGetValue(t.RawKind, out del)) { del = null; return false; @@ -742,7 +742,7 @@ public InPredicate> GetHasMissingPredicate(VectorType type) Contracts.Assert(_hasZeroDelegates.ContainsKey(t.RawKind)); del = _hasZeroDelegates[t.RawKind]; } - else if (!t.IsStandardScalar || !_hasNADelegates.TryGetValue(t.RawKind, out del)) + else if (!t.IsStandardScalar() || !_hasNADelegates.TryGetValue(t.RawKind, out del)) throw Contracts.Except("No HasMissing predicate for '{0}'", type); return (InPredicate>)del; diff --git a/src/Microsoft.ML.Data/Data/DataViewUtils.cs b/src/Microsoft.ML.Data/Data/DataViewUtils.cs index 1ee926378c..837980a8cf 100644 --- a/src/Microsoft.ML.Data/Data/DataViewUtils.cs +++ b/src/Microsoft.ML.Data/Data/DataViewUtils.cs @@ -199,7 +199,7 @@ public static bool AllCachable(Schema schema, Func predicate) /// public static bool IsCachable(this ColumnType type) { - return type != null && (type.IsPrimitive || type.IsVector); + return type != null && (type is PrimitiveType || type.IsVector); } /// @@ -859,7 +859,7 @@ public static OutPipe Create(ColumnType type, object pool) pipeType = typeof(ImplVec<>).MakeGenericType(type.ItemType.RawType); else { - Contracts.Assert(type.IsPrimitive); + Contracts.Assert(type is PrimitiveType); pipeType = typeof(ImplOne<>).MakeGenericType(type.RawType); } var constructor = pipeType.GetConstructor(new Type[] { typeof(object) }); diff --git a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs index 7f6aeff35e..1e088a1636 100644 --- a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs +++ b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs @@ -46,13 +46,13 @@ private static Delegate GetGetterAsDelegateCore(Row row, int col) public static Delegate GetGetterAs(ColumnType typeDst, Row row, int col) { Contracts.CheckValue(typeDst, nameof(typeDst)); - Contracts.CheckParam(typeDst.IsPrimitive, nameof(typeDst)); + Contracts.CheckParam(typeDst is PrimitiveType, nameof(typeDst)); Contracts.CheckValue(row, nameof(row)); Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col)); Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active"); var typeSrc = row.Schema[col].Type; - Contracts.Check(typeSrc.IsPrimitive, "Source column type must be primitive"); + Contracts.Check(typeSrc is PrimitiveType, "Source column type must be primitive"); Func> del = GetGetterAsCore; var methodInfo = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(typeSrc.RawType, typeDst.RawType); @@ -66,14 +66,14 @@ public static Delegate GetGetterAs(ColumnType typeDst, Row row, int col) public static ValueGetter GetGetterAs(ColumnType typeDst, Row row, int col) { Contracts.CheckValue(typeDst, nameof(typeDst)); - Contracts.CheckParam(typeDst.IsPrimitive, nameof(typeDst)); + Contracts.CheckParam(typeDst is PrimitiveType, nameof(typeDst)); Contracts.CheckParam(typeDst.RawType == typeof(TDst), nameof(typeDst)); Contracts.CheckValue(row, nameof(row)); Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col)); Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active"); var typeSrc = row.Schema[col].Type; - Contracts.Check(typeSrc.IsPrimitive, "Source column type must be primitive"); + Contracts.Check(typeSrc is PrimitiveType, "Source column type must be primitive"); Func> del = GetGetterAsCore; var methodInfo = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(typeSrc.RawType, typeof(TDst)); @@ -118,7 +118,7 @@ public static ValueGetter GetGetterAsStringBuilder(Row row, int c Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active"); var typeSrc = row.Schema[col].Type; - Contracts.Check(typeSrc.IsPrimitive, "Source column type must be primitive"); + Contracts.Check(typeSrc is PrimitiveType, "Source column type must be primitive"); return Utils.MarshalInvoke(GetGetterAsStringBuilderCore, typeSrc.RawType, typeSrc, row, col); } @@ -325,30 +325,6 @@ private static Func GetIsNewGroupDelegateCore(Row cursor, int col) }; } - [Obsolete("The usages of this appear to be based on a total misunderstanding of what Batch actually is. It is a mechanism " + - "to enable sharding and recovery of parallelized data, and has nothing to do with actual data.")] - [BestFriend] - internal static Func GetIsNewBatchDelegate(Row cursor, int batchSize) - { - Contracts.CheckParam(batchSize > 0, nameof(batchSize), "Batch size must be > 0"); - long lastNewBatchPosition = -1; - return () => - { - if (cursor.Position % batchSize != 0) - return false; - - // If the cursor just moved to a new batch, we need to return true. - if (lastNewBatchPosition != cursor.Position) - { - lastNewBatchPosition = cursor.Position; - return true; - } - - // The cursor is already in the new batch, if the condition is tested again, we need to return false. - return false; - }; - } - public static string TestGetLabelGetter(ColumnType type) { return TestGetLabelGetter(type, true); @@ -356,7 +332,7 @@ public static string TestGetLabelGetter(ColumnType type) public static string TestGetLabelGetter(ColumnType type, bool allowKeys) { - if (type == NumberType.R4 || type == NumberType.R8 || type.IsBool) + if (type == NumberType.R4 || type == NumberType.R8 || type is BoolType) return null; if (allowKeys && type.IsKey) @@ -394,7 +370,7 @@ private static ValueGetter GetLabelGetterNotFloat(Row cursor, int labelI Contracts.Assert(type != NumberType.R4 && type != NumberType.R8); // boolean type label mapping: True -> 1, False -> 0. - if (type.IsBool) + if (type is BoolType) { var getBoolSrc = cursor.GetGetter(labelIndex); return @@ -429,7 +405,7 @@ public static ValueGetter> GetLabelGetter(SlotCursor cursor) var type = cursor.GetSlotType().ItemType; if (type == NumberType.R4) return cursor.GetGetter(); - if (type == NumberType.R8 || type.IsBool) + if (type == NumberType.R8 || type is BoolType) return GetVecGetterAs(NumberType.R4, cursor); Contracts.Check(type.IsKey, "Only floating point number, boolean, and key type values can be used as label."); Contracts.Assert(TestGetLabelGetter(type) == null); diff --git a/src/Microsoft.ML.Data/DataLoadSave/DataOperations.cs b/src/Microsoft.ML.Data/DataLoadSave/DataOperations.cs index c07bd3b4de..7ae6effa91 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/DataOperations.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/DataOperations.cs @@ -14,6 +14,7 @@ namespace Microsoft.ML /// public sealed class DataOperations { + [BestFriend] internal IHostEnvironment Environment { get; } internal DataOperations(IHostEnvironment env) @@ -60,7 +61,7 @@ public IDataView FilterByColumn(IDataView input, string columnName, double lower Environment.CheckParam(lowerBound <= upperBound, nameof(upperBound), "Must be no less than lowerBound"); var type = input.Schema[columnName].Type; - if (!type.IsNumber) + if (!(type is NumberType)) throw Environment.ExceptSchemaMismatch(nameof(columnName), "filter", columnName, "number", type.ToString()); return new RangeFilter(Environment, input, columnName, lowerBound, upperBound, false); } diff --git a/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs b/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs index 6e557561b0..4c1dfe39cb 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs @@ -143,6 +143,7 @@ public static IEstimator WithOnFitDelegate(this IEst return new DelegateEstimator(estimator, onFit); } + [BestFriend] internal static T[] AppendElement(this T[] array, T element) { T[] result = new T[Utils.Size(array) + 1]; diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs index e93c5fdd8a..3999203e64 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs @@ -333,7 +333,7 @@ private Schema CreateSchema(IExceptionContext ectx, Column[] cols, IDataLoader s colSchema }; - return Schema.Create(new CompositeSchema(schemas)); + return new ZipBinding(schemas).OutputSchema; } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/LoadColumnAttribute.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/LoadColumnAttribute.cs index 8aefb527ea..9accb4a549 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/LoadColumnAttribute.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/LoadColumnAttribute.cs @@ -7,9 +7,6 @@ namespace Microsoft.ML.Data { - // REVIEW: The Start field is decorated with [Obsolete], and this warning disables using Obsolete for this class. - // The Start field should get deleted together with the Legacy API. -#pragma warning disable 618 /// /// Describes column information such as name and the source columns indices that this /// column encapsulates. @@ -21,11 +18,9 @@ public sealed class LoadColumnAttribute : Attribute /// Initializes new instance of . /// /// The index of the column in the text file. - // REVIEW: Remove calling the private constructor with just the start parameter, - // when the Legacy API's TextLoader gets deleted, and with it the Start field here. public LoadColumnAttribute(int columnIndex) - : this(columnIndex.ToString()) { + Sources = new List(); Sources.Add(new TextLoader.Range(columnIndex)); } @@ -34,11 +29,9 @@ public LoadColumnAttribute(int columnIndex) /// /// The starting column index, for the range. /// The ending column index, for the range. - // REVIEW: Calling the private constructor with just the start parameter, is incorrect, - // but it is just temporary there, until the Legacy API's TextLoader gets deleted, together with the Start field. public LoadColumnAttribute(int start, int end) - : this(start.ToString()) { + Sources = new List(); Sources.Add(new TextLoader.Range(start, end)); } @@ -46,27 +39,13 @@ public LoadColumnAttribute(int start, int end) /// Initializes new instance of . /// /// Distinct text file column indices to load as part of this column. - // REVIEW: Calling the private constructor with just the columnIndexes[0] parameter, is incorrect, - // but it is just temporary there, until the Legacy API's TextLoader gets deleted together with the Start field. public LoadColumnAttribute(int[] columnIndexes) - : this(columnIndexes[0].ToString()) // REVIEW: this is incorrect, but it is just temporary there, until the Legacy API's TextLoader gets deleted. { + Sources = new List(); foreach (var col in columnIndexes) Sources.Add(new TextLoader.Range(col)); } - [Obsolete("Should be deleted together with the Legacy project.")] - private LoadColumnAttribute(string start) - { - Sources = new List(); - Start = start; - } - internal List Sources; - - [Obsolete("Should be deleted together with the Legacy project.")] - [BestFriend] - internal string Start { get; } } -#pragma warning restore 618 } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 9619e4d0d5..69407bfe98 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -405,7 +405,7 @@ internal static class DefaultArguments /// Used as an input column range. /// A variable length segment (extending to the end of the input line) is represented by Lim == SrcLim. /// - private struct Segment + internal struct Segment { public int Min; public int Lim; @@ -440,7 +440,7 @@ public Segment(int min) /// /// Information for an output column. /// - private sealed class ColInfo + internal sealed class ColInfo { public readonly string Name; // REVIEW: Fix this for keys. @@ -1361,6 +1361,8 @@ public void Save(ModelSaveContext ctx) public IDataView Read(string path) => Read(new MultiFileSource(path)); + public IDataView Read(params string[] path) => Read(new MultiFileSource(path)); + internal static TextLoader CreateTextReader(IHostEnvironment host, bool hasHeader = DefaultArguments.HasHeader, char separator = DefaultArguments.Separator, diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 5b71bcc95e..3002177fa0 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -70,7 +70,7 @@ private Func GetCreatorOneCore(PrimitiveType type) private Func GetCreatorOneCore(PrimitiveType type) { - Contracts.Assert(type.IsStandardScalar || type.IsKey); + Contracts.Assert(type.IsStandardScalar() || type.IsKey); Contracts.Assert(typeof(T) == type.RawType); var fn = _conv.GetTryParseConversion(type); return rows => new PrimitivePipe(rows, type, fn); @@ -84,7 +84,7 @@ private Func GetCreatorVecCore(PrimitiveType type) private Func GetCreatorVecCore(PrimitiveType type) { - Contracts.Assert(type.IsStandardScalar || type.IsKey); + Contracts.Assert(type.IsStandardScalar() || type.IsKey); Contracts.Assert(typeof(T) == type.RawType); var fn = _conv.GetTryParseConversion(type); return rows => new VectorPipe(rows, type, fn); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index e03816c715..68551d7230 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -86,11 +86,11 @@ private abstract class ValueWriterBase : ValueWriter protected ValueWriterBase(PrimitiveType type, int source, char sep) : base(source) { - Contracts.Assert(type.IsStandardScalar || type.IsKey); + Contracts.Assert(type.IsStandardScalar() || type.IsKey); Contracts.Assert(type.RawType == typeof(T)); Sep = sep; - if (type.IsText) + if (type is TextType) { // For text we need to deal with escaping. ValueMapper, StringBuilder> c = MapText; @@ -154,7 +154,7 @@ public VecValueWriter(RowCursor cursor, VectorType type, int source, char sep) ColumnType typeNames; if (type.IsKnownSizeVector && (typeNames = cursor.Schema[source].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) != null && - typeNames.VectorSize == type.VectorSize && typeNames.ItemType.IsText) + typeNames.VectorSize == type.VectorSize && typeNames.ItemType is TextType) { cursor.Schema[source].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref _slotNames); Contracts.Check(_slotNames.Length == typeNames.VectorSize, "Unexpected slot names length"); @@ -314,7 +314,7 @@ public static string SeparatorCharToString(char separator) public bool IsColumnSavable(ColumnType type) { var item = type.ItemType; - return item.IsStandardScalar || item.IsKey; + return item.IsStandardScalar() || item.IsKey; } public void SaveData(Stream stream, IDataView data, params int[] cols) @@ -407,7 +407,7 @@ private void WriteDataCore(IChannel ch, TextWriter writer, IDataView data, if (!type.IsKnownSizeVector) continue; var typeNames = data.Schema[cols[i]].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (typeNames != null && typeNames.VectorSize == type.VectorSize && typeNames.ItemType.IsText) + if (typeNames != null && typeNames.VectorSize == type.VectorSize && typeNames.ItemType is TextType) hasHeader = true; } } diff --git a/src/Microsoft.ML.Data/DataView/CacheDataView.cs b/src/Microsoft.ML.Data/DataView/CacheDataView.cs index 2563fe5cc1..a41aabd01c 100644 --- a/src/Microsoft.ML.Data/DataView/CacheDataView.cs +++ b/src/Microsoft.ML.Data/DataView/CacheDataView.cs @@ -1369,7 +1369,7 @@ public static ColumnCache Create(CacheDataView parent, RowCursor input, int srcC pipeType = typeof(ImplVec<>).MakeGenericType(type.ItemType.RawType); else { - host.Assert(type.IsPrimitive); + host.Assert(type is PrimitiveType); pipeType = typeof(ImplOne<>).MakeGenericType(type.RawType); } if (_pipeConstructorTypes == null) diff --git a/src/Microsoft.ML.Data/DataView/DataViewConstructionUtils.cs b/src/Microsoft.ML.Data/DataView/DataViewConstructionUtils.cs index 4fb1cc7f30..16ae6b0744 100644 --- a/src/Microsoft.ML.Data/DataView/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Data/DataView/DataViewConstructionUtils.cs @@ -168,7 +168,7 @@ private Delegate CreateGetter(ColumnType colType, InternalSchemaDefinition.Colum // String[] -> ReadOnlyMemory if (outputType.GetElementType() == typeof(string)) { - Host.Assert(colType.ItemType.IsText); + Host.Assert(colType.ItemType is TextType); return CreateConvertingArrayGetterDelegate>(peek, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty); } @@ -190,12 +190,12 @@ private Delegate CreateGetter(ColumnType colType, InternalSchemaDefinition.Colum del = CreateDirectVBufferGetterDelegate; genericType = colType.ItemType.RawType; } - else if (colType.IsPrimitive) + else if (colType is PrimitiveType) { if (outputType == typeof(string)) { // String -> ReadOnlyMemory - Host.Assert(colType.IsText); + Host.Assert(colType is TextType); return CreateConvertingGetterDelegate>(peek, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty); } @@ -644,11 +644,8 @@ protected override bool MoveManyCore(long count) /// /// An in-memory data view based on the IEnumerable of data. /// Doesn't support shuffling. - /// - /// This class is public because prediction engine wants to call its - /// for performance reasons. /// - public sealed class StreamingDataView : DataViewBase + internal sealed class StreamingDataView : DataViewBase where TRow : class { private IEnumerable _data; @@ -675,18 +672,6 @@ public override RowCursor GetRowCursor(Func predicate, Random rand = return new WrappedCursor (new Cursor(Host, this, predicate)); } - /// - /// Since all the cursors only depend on an enumerator (rather than the data itself), - /// it's safe to 'swap' the data inside the streaming data view. This doesn't affect - /// the current 'live' cursors, only the ones that will be created later. - /// This is used for efficiency in . - /// - public void SetData(IEnumerable data) - { - Contracts.CheckValue(data, nameof(data)); - _data = data; - } - private sealed class Cursor : DataViewCursorBase { private readonly IEnumerator _enumerator; @@ -932,12 +917,12 @@ public override ValueGetter GetGetter() .MakeGenericMethod(MetadataType.ItemType.RawType) .Invoke(this, new object[] { }) as ValueGetter; } - if (MetadataType.IsPrimitive) + if (MetadataType is PrimitiveType) { if (typeT == typeof(string)) { // String -> ReadOnlyMemory - Contracts.Assert(MetadataType.IsText); + Contracts.Assert(MetadataType is TextType); ValueGetter> m = GetString; return m as ValueGetter; } diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index b7e3648445..90279bb5e5 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -416,7 +416,7 @@ public SlotCursorOne(Transposer parent, int col) Ch.Assert(parent._splitLim[iinfo] - _col == 1); } Ch.AssertValue(_view); - Ch.Assert(_view.Schema[_col].Type.IsPrimitive); + Ch.Assert(_view.Schema[_col].Type is PrimitiveType); Ch.Assert(_view.Schema[_col].Type.RawType == typeof(T)); _len = parent.RowCount; } @@ -1015,7 +1015,7 @@ protected Splitter(IDataView view, int col) public static Splitter Create(IDataView view, int col) { var type = view.Schema[col].Type; - Contracts.Assert(type.IsPrimitive || type.VectorSize > 0); + Contracts.Assert(type is PrimitiveType || type.VectorSize > 0); const int defaultSplitThreshold = 16; if (type.VectorSize <= defaultSplitThreshold) return Utils.MarshalInvoke(CreateCore, type.RawType, view, col); diff --git a/src/Microsoft.ML.Data/DataView/TypedCursor.cs b/src/Microsoft.ML.Data/DataView/TypedCursor.cs index 724a19ff29..f226b461e5 100644 --- a/src/Microsoft.ML.Data/DataView/TypedCursor.cs +++ b/src/Microsoft.ML.Data/DataView/TypedCursor.cs @@ -274,7 +274,7 @@ private Action GenerateSetter(Row input, int index, InternalSchemaDefiniti // VBuffer> -> String[] if (fieldType.GetElementType() == typeof(string)) { - Ch.Assert(colType.ItemType.IsText); + Ch.Assert(colType.ItemType is TextType); return CreateConvertingVBufferSetter, string>(input, index, poke, peek, x => x.ToString()); } @@ -296,12 +296,12 @@ private Action GenerateSetter(Row input, int index, InternalSchemaDefiniti del = CreateVBufferToVBufferSetter; genericType = colType.ItemType.RawType; } - else if (colType.IsPrimitive) + else if (colType is PrimitiveType) { if (fieldType == typeof(string)) { // ReadOnlyMemory -> String - Ch.Assert(colType.IsText); + Ch.Assert(colType is TextType); Ch.Assert(peek == null); return CreateConvertingActionSetter, string>(input, index, poke, x => x.ToString()); } diff --git a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs b/src/Microsoft.ML.Data/DataView/ZipBinding.cs similarity index 61% rename from src/Microsoft.ML.Data/DataView/CompositeSchema.cs rename to src/Microsoft.ML.Data/DataView/ZipBinding.cs index 2d4ca1a49c..a2eea6c60c 100644 --- a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs +++ b/src/Microsoft.ML.Data/DataView/ZipBinding.cs @@ -3,8 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.Collections.Generic; -using System.Linq; using Microsoft.ML.Internal.Utilities; namespace Microsoft.ML.Data @@ -13,16 +11,16 @@ namespace Microsoft.ML.Data /// A convenience class for concatenating several schemas together. /// This would be necessary when combining IDataViews through any type of combining operation, for example, zip. /// - internal sealed class CompositeSchema : ISchema + internal sealed class ZipBinding { private readonly Schema[] _sources; - public Schema AsSchema { get; } + public Schema OutputSchema { get; } // Zero followed by cumulative column counts. Zero being used for the empty case. private readonly int[] _cumulativeColCounts; - public CompositeSchema(Schema[] sources) + public ZipBinding(Schema[] sources) { Contracts.AssertNonEmpty(sources); _sources = sources; @@ -34,7 +32,11 @@ public CompositeSchema(Schema[] sources) var schema = sources[i]; _cumulativeColCounts[i + 1] = _cumulativeColCounts[i] + schema.Count; } - AsSchema = Schema.Create(this); + + var schemaBuilder = new SchemaBuilder(); + foreach (var sourceSchema in sources) + schemaBuilder.AddColumns(sourceSchema); + OutputSchema = schemaBuilder.GetSchema(); } public int ColumnCount => _cumulativeColCounts[_cumulativeColCounts.Length - 1]; @@ -74,50 +76,5 @@ public void GetColumnSource(int col, out int srcIndex, out int srcCol) srcCol = col - _cumulativeColCounts[srcIndex]; Contracts.Assert(0 <= srcCol && srcCol < _sources[srcIndex].Count); } - - public bool TryGetColumnIndex(string name, out int col) - { - for (int i = _sources.Length; --i >= 0;) - { - if (_sources[i].TryGetColumnIndex(name, out col)) - { - col += _cumulativeColCounts[i]; - return true; - } - } - - col = -1; - return false; - } - - public string GetColumnName(int col) - { - GetColumnSource(col, out int dv, out int srcCol); - return _sources[dv][srcCol].Name; - } - - public ColumnType GetColumnType(int col) - { - GetColumnSource(col, out int dv, out int srcCol); - return _sources[dv][srcCol].Type; - } - - public IEnumerable> GetMetadataTypes(int col) - { - GetColumnSource(col, out int dv, out int srcCol); - return _sources[dv][srcCol].Metadata.Schema.Select(c => new KeyValuePair(c.Name, c.Type)); - } - - public ColumnType GetMetadataTypeOrNull(string kind, int col) - { - GetColumnSource(col, out int dv, out int srcCol); - return _sources[dv][srcCol].Metadata.Schema.GetColumnOrNull(kind)?.Type; - } - - public void GetMetadata(string kind, int col, ref TValue value) - { - GetColumnSource(col, out int dv, out int srcCol); - _sources[dv][srcCol].Metadata.GetValue(kind, ref value); - } } } diff --git a/src/Microsoft.ML.Data/DataView/ZipDataView.cs b/src/Microsoft.ML.Data/DataView/ZipDataView.cs index 827b47f724..ecf0e280aa 100644 --- a/src/Microsoft.ML.Data/DataView/ZipDataView.cs +++ b/src/Microsoft.ML.Data/DataView/ZipDataView.cs @@ -25,7 +25,7 @@ public sealed class ZipDataView : IDataView private readonly IHost _host; private readonly IDataView[] _sources; - private readonly CompositeSchema _compositeSchema; + private readonly ZipBinding _zipBinding; public static IDataView Create(IHostEnvironment env, IEnumerable sources) { @@ -47,12 +47,12 @@ private ZipDataView(IHost host, IDataView[] sources) _host.Assert(Utils.Size(sources) > 1); _sources = sources; - _compositeSchema = new CompositeSchema(_sources.Select(x => x.Schema).ToArray()); + _zipBinding = new ZipBinding(_sources.Select(x => x.Schema).ToArray()); } public bool CanShuffle { get { return false; } } - public Schema Schema => _compositeSchema.AsSchema; + public Schema Schema => _zipBinding.OutputSchema; public long? GetRowCount() { @@ -75,7 +75,7 @@ public RowCursor GetRowCursor(Func predicate, Random rand = null) _host.CheckValue(predicate, nameof(predicate)); _host.CheckValueOrNull(rand); - var srcPredicates = _compositeSchema.GetInputPredicates(predicate); + var srcPredicates = _zipBinding.GetInputPredicates(predicate); // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because @@ -106,7 +106,7 @@ public RowCursor[] GetRowCursorSet(Func predicate, int n, Random rand private sealed class Cursor : RootCursorBase { private readonly RowCursor[] _cursors; - private readonly CompositeSchema _compositeSchema; + private readonly ZipBinding _zipBinding; private readonly bool[] _isColumnActive; private bool _disposed; @@ -119,8 +119,8 @@ public Cursor(ZipDataView parent, RowCursor[] srcCursors, Func predic Ch.AssertValue(predicate); _cursors = srcCursors; - _compositeSchema = parent._compositeSchema; - _isColumnActive = Utils.BuildArray(_compositeSchema.ColumnCount, predicate); + _zipBinding = parent._zipBinding; + _isColumnActive = Utils.BuildArray(_zipBinding.ColumnCount, predicate); } protected override void Dispose(bool disposing) @@ -172,11 +172,11 @@ protected override bool MoveManyCore(long count) return true; } - public override Schema Schema => _compositeSchema.AsSchema; + public override Schema Schema => _zipBinding.OutputSchema; public override bool IsColumnActive(int col) { - _compositeSchema.CheckColumnInRange(col); + _zipBinding.CheckColumnInRange(col); return _isColumnActive[col]; } @@ -184,7 +184,7 @@ public override ValueGetter GetGetter(int col) { int dv; int srcCol; - _compositeSchema.GetColumnSource(col, out dv, out srcCol); + _zipBinding.GetColumnSource(col, out dv, out srcCol); return _cursors[dv].GetGetter(srcCol); } } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs index 217f6751cc..7f1d40c176 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs @@ -365,7 +365,7 @@ internal static AggregatorDictionaryBase Create(RoleMappedSchema schema, string Contracts.AssertNonWhiteSpace(stratCol); Contracts.AssertValue(createAgg); - if (stratType.KeyCount == 0 && !stratType.IsText) + if (stratType.KeyCount == 0 && !(stratType is TextType)) { throw Contracts.ExceptUserArg(nameof(MamlEvaluatorBase.ArgumentsBase.StratColumn), "Stratification column '{0}' has type '{1}', but must be a known count key or text", stratCol, stratType); diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 8a3c61705d..fadf80dfa6 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -84,7 +84,7 @@ public static IMamlEvaluator GetEvaluator(IHostEnvironment env, Schema schema) private static bool CheckScoreColumnKindIsKnown(Schema schema, int col) { var columnType = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.ScoreColumnKind)?.Type; - if (columnType == null || !columnType.IsText) + if (columnType == null || !(columnType is TextType)) return false; ReadOnlyMemory tmp = default; schema[col].Metadata.GetValue(MetadataUtils.Kinds.ScoreColumnKind, ref tmp); @@ -96,7 +96,7 @@ private static bool CheckScoreColumnKindIsKnown(Schema schema, int col) private static bool CheckScoreColumnKind(Schema schema, int col) { var columnType = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.ScoreColumnKind)?.Type; - return columnType != null && columnType.IsText; + return columnType != null && columnType is TextType; } /// @@ -219,7 +219,7 @@ private static bool IsScoreColumnKind(IExceptionContext ectx, Schema schema, int ectx.CheckNonEmpty(kind, nameof(kind)); var type = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.ScoreColumnKind)?.Type; - if (type == null || !type.IsText) + if (type == null || !(type is TextType)) return false; var tmp = default(ReadOnlyMemory); schema[col].Metadata.GetValue(MetadataUtils.Kinds.ScoreColumnKind, ref tmp); @@ -346,7 +346,7 @@ public static IEnumerable> GetMetrics(IDataView met VBuffer> names = default; var size = schema[i].Type.VectorSize; var slotNamesType = schema[i].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (slotNamesType != null && slotNamesType.VectorSize == size && slotNamesType.ItemType.IsText) + if (slotNamesType != null && slotNamesType.VectorSize == size && slotNamesType.ItemType is TextType) schema[i].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names); else { @@ -997,7 +997,7 @@ private static List GetMetricNames(IChannel ch, Schema schema, Row row, var type = schema[i].Type; var metricName = row.Schema[i].Name; - if (type.IsNumber) + if (type is NumberType) { getters[i] = RowCursorUtils.GetGetterAs(NumberType.R8, row, i); metricNames.Add(metricName); @@ -1014,7 +1014,7 @@ private static List GetMetricNames(IChannel ch, Schema schema, Row row, vBufferGetters[i] = row.GetGetter>(i); metricCount += type.VectorSize; var slotNamesType = schema[i].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (slotNamesType != null && slotNamesType.VectorSize == type.VectorSize && slotNamesType.ItemType.IsText) + if (slotNamesType != null && slotNamesType.VectorSize == type.VectorSize && slotNamesType.ItemType is TextType) schema[i].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names); else { @@ -1214,7 +1214,7 @@ internal static IDataView GetAverageToDataView(IHostEnvironment env, Schema sche if (i == stratCol) { var keyValuesType = schema[i].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type; - if (keyValuesType == null || !keyValuesType.ItemType.IsText || + if (keyValuesType == null || !(keyValuesType.ItemType is TextType) || keyValuesType.VectorSize != type.KeyCount) { throw env.Except("Column '{0}' must have key values metadata", @@ -1251,7 +1251,7 @@ internal static IDataView GetAverageToDataView(IHostEnvironment env, Schema sche dvBldr.AddColumn(MetricKinds.ColumnNames.FoldIndex, TextType.Instance, foldVals); weightedDvBldr?.AddColumn(MetricKinds.ColumnNames.FoldIndex, TextType.Instance, foldVals); } - else if (type.IsNumber) + else if (type is NumberType) { dvBldr.AddScalarColumn(schema, agg, hasStdev, numFolds, iMetric); weightedDvBldr?.AddScalarColumn(schema, weightedAgg, hasStdev, numFolds, iMetric); @@ -1343,7 +1343,7 @@ public static string GetConfusionTable(IHost host, IDataView confusionDataView, int countCol; host.Check(confusionDataView.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out countCol), "Did not find the count column"); var type = confusionDataView.Schema[countCol].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - host.Check(type != null && type.IsKnownSizeVector && type.ItemType.IsText, "The Count column does not have a text vector metadata of kind SlotNames."); + host.Check(type != null && type.IsKnownSizeVector && type.ItemType is TextType, "The Count column does not have a text vector metadata of kind SlotNames."); var labelNames = default(VBuffer>); confusionDataView.Schema[countCol].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref labelNames); @@ -1687,7 +1687,7 @@ public static void PrintWarnings(IChannel ch, Dictionary metr if (metrics.TryGetValue(MetricKinds.Warnings, out warnings)) { int col; - if (warnings.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.WarningText, out col) && warnings.Schema[col].Type.IsText) + if (warnings.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.WarningText, out col) && warnings.Schema[col].Type is TextType) { using (var cursor = warnings.GetRowCursor(c => c == col)) { diff --git a/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs index a59d785aa2..cdf973137e 100644 --- a/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs @@ -99,7 +99,7 @@ private ReadOnlyMemory[] GetClassNames(RoleMappedSchema schema) var scoreInfo = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); var mdType = schema.Schema[scoreInfo.Index].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; var labelNames = default(VBuffer>); - if (mdType != null && mdType.IsKnownSizeVector && mdType.ItemType.IsText) + if (mdType != null && mdType.IsKnownSizeVector && mdType.ItemType is TextType) { schema.Schema[scoreInfo.Index].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref labelNames); names = new ReadOnlyMemory[labelNames.Length]; diff --git a/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs index 57e0629418..fef39899b0 100644 --- a/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs @@ -591,7 +591,7 @@ private void GetScoreValueKind(ref ReadOnlyMemory dst) private ValueGetter>> CreateSlotNamesGetter(Schema schema, int column, int length, string prefix) { var type = schema[column].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (type != null && type.IsText) + if (type != null && type is TextType) { return (ref VBuffer> dst) => schema[column].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref dst); diff --git a/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs index 10f109c4ce..3acd90bd91 100644 --- a/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs @@ -44,7 +44,7 @@ private protected override IRowMapper CreatePerInstanceRowMapper(RoleMappedSchem var scoreInfo = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); int scoreSize = scoreInfo.Type.VectorSize; var type = schema.Schema[scoreInfo.Index].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - Host.Check(type != null && type.IsKnownSizeVector && type.ItemType.IsText, "Quantile regression score column must have slot names"); + Host.Check(type != null && type.IsKnownSizeVector && type.ItemType is TextType, "Quantile regression score column must have slot names"); var quantiles = default(VBuffer>); schema.Schema[scoreInfo.Index].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref quantiles); Host.Assert(quantiles.IsDense && quantiles.Length == scoreSize); @@ -71,7 +71,7 @@ private protected override Aggregator GetAggregatorCore(RoleMappedSchema schema, Host.Assert(t.VectorSize > 0 && (t.ItemType == NumberType.R4 || t.ItemType == NumberType.R8)); var slotNames = default(VBuffer>); t = schema.Schema[scoreInfo.Index].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (t != null && t.VectorSize == scoreInfo.Type.VectorSize && t.ItemType.IsText) + if (t != null && t.VectorSize == scoreInfo.Type.VectorSize && t.ItemType is TextType) schema.Schema[scoreInfo.Index].GetSlotNames(ref slotNames); return new Aggregator(Host, LossFunction, schema.Weight != null, scoreInfo.Type.VectorSize, in slotNames, stratName); } diff --git a/src/Microsoft.ML.Data/Model/ModelOperationsCatalog.cs b/src/Microsoft.ML.Data/Model/ModelOperationsCatalog.cs index aa8b722147..0f03d5f8f8 100644 --- a/src/Microsoft.ML.Data/Model/ModelOperationsCatalog.cs +++ b/src/Microsoft.ML.Data/Model/ModelOperationsCatalog.cs @@ -13,6 +13,10 @@ namespace Microsoft.ML /// public sealed class ModelOperationsCatalog { + /// + /// This is a best friend because an extension method defined in another assembly needs this field. + /// + [BestFriend] internal IHostEnvironment Environment { get; } public ExplainabilityTransforms Explainability { get; } @@ -33,7 +37,6 @@ protected SubCatalogBase(ModelOperationsCatalog owner) { Environment = owner.Environment; } - } /// diff --git a/src/Microsoft.ML.Data/Model/Pfa/PfaUtils.cs b/src/Microsoft.ML.Data/Model/Pfa/PfaUtils.cs index 9215e233e0..6d099cd63d 100644 --- a/src/Microsoft.ML.Data/Model/Pfa/PfaUtils.cs +++ b/src/Microsoft.ML.Data/Model/Pfa/PfaUtils.cs @@ -173,7 +173,7 @@ private static JToken PfaTypeOrNullCore(ColumnType itemType) { Contracts.AssertValue(itemType); - if (!itemType.IsPrimitive) + if (!(itemType is PrimitiveType)) return null; if (itemType.IsKey) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index d8b1de1cc8..a15224c74c 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -537,7 +537,7 @@ public Bound(IHostEnvironment env, SchemaBindableCalibratedPredictor parent, Rol if (!_predictor.OutputSchema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out _scoreCol)) throw env.Except("Predictor does not output a score"); var scoreType = _predictor.OutputSchema[_scoreCol].Type; - env.Check(!scoreType.IsVector && scoreType.IsNumber); + env.Check(!scoreType.IsVector && scoreType is NumberType); OutputSchema = Schema.Create(new BinaryClassifierSchema()); } diff --git a/src/Microsoft.ML.Data/Prediction/PredictionEngine.cs b/src/Microsoft.ML.Data/Prediction/PredictionEngine.cs index d4fede17f7..47ce780dcb 100644 --- a/src/Microsoft.ML.Data/Prediction/PredictionEngine.cs +++ b/src/Microsoft.ML.Data/Prediction/PredictionEngine.cs @@ -10,73 +10,6 @@ namespace Microsoft.ML { - // REVIEW: Temporarly moving here since it is used by the Legacy project. Remove when removing the legacy project. - /// - /// A class that runs the previously trained model (and the preceding transform pipeline) on the - /// in-memory data in batch mode. - /// This can also be used with trained pipelines that do not end with a predictor: in this case, the - /// 'prediction' will be just the outcome of all the transformations. - /// - /// The user-defined type that holds the example. - /// The user-defined type that holds the prediction. - [BestFriend] - internal sealed class BatchPredictionEngine - where TSrc : class - where TDst : class, new() - { - // The source data view. - private readonly DataViewConstructionUtils.StreamingDataView _srcDataView; - // The transformation engine. - private readonly PipeEngine _pipeEngine; - - internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, - SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) - { - Contracts.AssertValue(env); - Contracts.AssertValue(modelStream); - Contracts.AssertValueOrNull(inputSchemaDefinition); - Contracts.AssertValueOrNull(outputSchemaDefinition); - - // Initialize pipe. - _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); - var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView); - _pipeEngine = new PipeEngine(env, pipe, ignoreMissingColumns, outputSchemaDefinition); - } - - internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns, - SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) - { - Contracts.AssertValue(env); - Contracts.AssertValue(dataPipeline); - Contracts.AssertValueOrNull(inputSchemaDefinition); - Contracts.AssertValueOrNull(outputSchemaDefinition); - - // Initialize pipe. - _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); - var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView); - - _pipeEngine = new PipeEngine(env, pipe, ignoreMissingColumns, outputSchemaDefinition); - } - - /// - /// Run the prediction pipe. This will enumerate the exactly once, - /// cache all the examples (by reference) into its internal representation and then run - /// the transformation pipe. - /// - /// The examples to run the prediction on. - /// If true, the engine will not allocate memory per output, and - /// the returned objects will actually always be the same object. The user is - /// expected to clone the values himself if needed. - /// The that contains all the pipeline results. - public IEnumerable Predict(IEnumerable examples, bool reuseRowObjects) - { - Contracts.CheckValue(examples, nameof(examples)); - - _pipeEngine.Reset(); - _srcDataView.SetData(examples); - return _pipeEngine.RunPipe(reuseRowObjects); - } - } /// /// Utility class to run the pipeline to completion and produce a strongly-typed IEnumerable as a result. diff --git a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs index 43587d4544..19f852bdfe 100644 --- a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs @@ -14,7 +14,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries.Tests" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EntryPoints" + PublicKey.Value)] -[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Maml" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.ResultProcessor" + PublicKey.Value)] @@ -40,4 +39,41 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CntkWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.DssmFeaturizer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DssmTrigram" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EdgeML" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.FastTree" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Garage" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.HelperCommands" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Internal.ImageAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.InferNetWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.LDSVM" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "LibSvmWrapper" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.RecipeInference" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.PyTrainer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "ParameterMixer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.Api" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TextAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "StratoLearner" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.SequencePrediction" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.TextAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Sar" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.OcrTransform" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TreeVisualizer" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TlcAzurePublish" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.XGBoost" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.SLib" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.VowpalWabbit" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TlcCustomModule" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.Scope" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "TLC" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTestsMore" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Data/Scorers/FeatureContributionCalculation.cs b/src/Microsoft.ML.Data/Scorers/FeatureContributionCalculation.cs index cc9e4cce85..21e30f6935 100644 --- a/src/Microsoft.ML.Data/Scorers/FeatureContributionCalculation.cs +++ b/src/Microsoft.ML.Data/Scorers/FeatureContributionCalculation.cs @@ -337,7 +337,7 @@ public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema s } _outputGenericSchema = _genericRowMapper.OutputSchema; - OutputSchema = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema; + OutputSchema = new ZipBinding(new Schema[] { _outputGenericSchema, _outputSchema, }).OutputSchema; } /// diff --git a/src/Microsoft.ML.Data/Scorers/MultiClassClassifierScorer.cs b/src/Microsoft.ML.Data/Scorers/MultiClassClassifierScorer.cs index 52e963b3a2..82247b3d0d 100644 --- a/src/Microsoft.ML.Data/Scorers/MultiClassClassifierScorer.cs +++ b/src/Microsoft.ML.Data/Scorers/MultiClassClassifierScorer.cs @@ -75,6 +75,8 @@ public sealed class LabelNameBindableMapper : ISchemaBindableMapper, ICanSaveMod private readonly IHost _host; private readonly Func _canWrap; + internal ISchemaBindableMapper Bindable => _bindable; + public VectorType Type => _type; bool ICanSavePfa.CanSavePfa => (_bindable as ICanSavePfa)?.CanSavePfa == true; bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => (_bindable as ICanSaveOnnx)?.CanSaveOnnx(ctx) == true; @@ -192,6 +194,11 @@ private void SaveCore(ModelSaveContext ctx) throw _host.Except("We do not know how to serialize label names of type '{0}'", _type.ItemType); } + internal ISchemaBindableMapper Clone(ISchemaBindableMapper inner) + { + return new LabelNameBindableMapper(_host, inner, _type, _getter, _metadataKind, _canWrap); + } + void IBindableCanSavePfa.SaveAsPfa(BoundPfaContext ctx, RoleMappedSchema schema, string[] outputNames) { Contracts.CheckValue(ctx, nameof(ctx)); @@ -392,7 +399,7 @@ private static ISchemaBoundMapper WrapIfNeeded(IHostEnvironment env, ISchemaBoun /// from the model of a bindable mapper) /// Whether we can call with /// this mapper and expect it to succeed - private static bool CanWrap(ISchemaBoundMapper mapper, ColumnType labelNameType) + internal static bool CanWrap(ISchemaBoundMapper mapper, ColumnType labelNameType) { Contracts.AssertValue(mapper); Contracts.AssertValue(labelNameType); @@ -414,7 +421,7 @@ private static bool CanWrap(ISchemaBoundMapper mapper, ColumnType labelNameType) return labelNameType.IsVector && labelNameType.VectorSize == scoreType.VectorSize; } - private static ISchemaBoundMapper WrapCore(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) + internal static ISchemaBoundMapper WrapCore(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.AssertValue(env); env.AssertValue(mapper); diff --git a/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs index 0c245ceefc..df33a03e45 100644 --- a/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs @@ -486,7 +486,7 @@ private BoundColumn MakeColumn(Schema inputSchema, int iinfo) hasSlotNames = true; } - if (!itemType.IsNumber) + if (!(itemType is NumberType)) isNormalized = false; if (totalSize == 0) { @@ -625,7 +625,7 @@ private void GetSlotNames(ref VBuffer> dst) if (inputMetadata != null && inputMetadata.Schema.TryGetColumnIndex(MetadataUtils.Kinds.SlotNames, out int idx)) typeNames = inputMetadata.Schema[idx].Type; - if (typeNames != null && typeNames.VectorSize == typeSrc.VectorSize && typeNames.ItemType.IsText) + if (typeNames != null && typeNames.VectorSize == typeSrc.VectorSize && typeNames.ItemType is TextType) { inputMetadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names); sb.Clear(); @@ -791,7 +791,7 @@ public KeyValuePair SavePfaInfo(BoundPfaContext ctx) var srcName = _columnInfo.Inputs[i].name; if ((srcTokens[i] = ctx.TokenOrNullForName(srcName)) == null) return new KeyValuePair(outName, null); - srcPrimitive[i] = _srcTypes[i].IsPrimitive; + srcPrimitive[i] = _srcTypes[i] is PrimitiveType; } Contracts.Assert(srcTokens.All(tok => tok != null)); var itemColumnType = OutputType.ItemType; diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index f14144a45e..60c6f03794 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -475,7 +475,7 @@ public Mapper(SlotsDroppingTransformer parent, Schema inputSchema) /// a string, a key, a float or a double. /// private static bool IsValidColumnType(ColumnType type) - => (0 < type.KeyCount && type.KeyCount < Utils.ArrayMaxSize) || type == NumberType.R4 || type == NumberType.R8 || type.IsText; + => (0 < type.KeyCount && type.KeyCount < Utils.ArrayMaxSize) || type == NumberType.R4 || type == NumberType.R8 || type is TextType; /// /// Computes the types (column and slotnames), the length reduction, categorical feature indices diff --git a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs index 9baf2714ae..65a2653e47 100644 --- a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs @@ -18,17 +18,17 @@ public static class ExplainabilityCatalog /// The model explainability operations catalog. /// Trained model parameters that support Feature Contribution Calculation and which will be used for scoring. /// The name of the feature column that will be used as input. - /// The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. - /// The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. + /// The number of positive contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. + /// The number of negative contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. /// Whether the feature contributions should be normalized to the [-1, 1] interval. public static FeatureContributionCalculatingEstimator FeatureContributionCalculation(this ModelOperationsCatalog.ExplainabilityTransforms catalog, ICalculateFeatureContribution modelParameters, string featureColumn = DefaultColumnNames.Features, - int top = FeatureContributionDefaults.Top, - int bottom = FeatureContributionDefaults.Bottom, + int numPositiveContributions = FeatureContributionDefaults.NumPositiveContributions, + int numNegativeContributions = FeatureContributionDefaults.NumNegativeContributions, bool normalize = FeatureContributionDefaults.Normalize) - => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, top, bottom, normalize); + => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize); } } diff --git a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs index 7b2bbd87ab..77f841ba1f 100644 --- a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs @@ -25,10 +25,17 @@ namespace Microsoft.ML.Data { /// - /// The FeatureContributionCalculationTransformer computes model-specific contribution scores for each feature. + /// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example. /// See the list of currently supported models below. /// /// + /// + /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions + /// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific + /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative + /// (they make the score lower). + /// + /// /// Feature Contribution Calculation is currently supported for the following models: /// Regression: /// OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression, @@ -39,8 +46,25 @@ namespace Microsoft.ML.Data /// FastForest, FastTree, LightGbm /// Ranking: /// FastTree, LightGbm - /// + /// + /// + /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly, + /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at + /// the feature value. + /// + /// + /// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact + /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature + /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered. + /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1 + /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false + /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score + /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with + /// many decision trees. + /// + /// /// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer. + /// /// /// /// @@ -60,10 +84,10 @@ public sealed class Arguments : TransformInputBase public string FeatureColumn = DefaultColumnNames.Features; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of top contributions", SortOrder = 3)] - public int Top = FeatureContributionCalculatingEstimator.Defaults.Top; + public int Top = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of bottom contributions", SortOrder = 4)] - public int Bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom; + public int Bottom = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether or not output of Features contribution should be normalized", ShortName = "norm", SortOrder = 5)] public bool Normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize; @@ -98,23 +122,23 @@ private static VersionInfo GetVersionInfo() /// The environment to use. /// Trained model parameters that support Feature Contribution Calculation and which will be used for scoring. /// The name of the feature column that will be used as input. - /// The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. - /// The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. + /// The number of positive contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. + /// The number of negative contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. /// Whether the feature contributions should be normalized to the [-1, 1] interval. public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters, string featureColumn = DefaultColumnNames.Features, - int top = FeatureContributionCalculatingEstimator.Defaults.Top, - int bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom, + int numPositiveContributions = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions, + int numNegativeContributions = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions, bool normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)), new[] { (input: featureColumn, output: DefaultColumnNames.FeatureContributions) }) { Host.CheckValue(modelParameters, nameof(modelParameters)); Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - if (top < 0) + if (numPositiveContributions < 0) throw Host.Except($"Number of top contribution must be non negative"); - if (bottom < 0) + if (numNegativeContributions < 0) throw Host.Except($"Number of bottom contribution must be non negative"); // If a predictor implements ICalculateFeatureContribution, it also implements the internal interface IFeatureContributionMapper. @@ -122,8 +146,8 @@ public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculat _predictor = modelParameters as IFeatureContributionMapper; Host.AssertValue(_predictor); - Top = top; - Bottom = bottom; + Top = numPositiveContributions; + Bottom = numNegativeContributions; Normalize = normalize; } @@ -258,8 +282,8 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimatorThe environment to use. /// Trained model parameters that support Feature Contribution Calculation and which will be used for scoring. /// The name of the feature column that will be used as input. - /// The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. - /// The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column. - /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. + /// The number of positive contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with positive contributions than , the rest will be returned as zeros. + /// The number of negative contributions to report, sorted from highest magnitude to lowest magnitude. + /// Note that if there are fewer features with negative contributions than , the rest will be returned as zeros. /// Whether the feature contributions should be normalized to the [-1, 1] interval. public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters, string featureColumn = DefaultColumnNames.Features, - int top = Defaults.Top, - int bottom = Defaults.Bottom, + int numPositiveContributions = Defaults.NumPositiveContributions, + int numNegativeContributions = Defaults.NumNegativeContributions, bool normalize = Defaults.Normalize) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)), - new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, top, bottom, normalize)) + new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize)) { _featureColumn = featureColumn; _predictor = modelParameters; diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 9a0fbff6b1..30bd08aae2 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -1201,7 +1201,7 @@ internal static class Defaults internal static bool IsColumnTypeValid(ColumnType type) { var itemType = type.ItemType; - return itemType.IsText || itemType.IsKey || itemType.IsNumber || itemType.IsBool; + return itemType is TextType || itemType.IsKey || itemType is NumberType || itemType is BoolType; } internal const string ExpectedColumnType = "Expected Text, Key, numeric or Boolean item type"; diff --git a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs index 355b3e0333..343e3f9455 100644 --- a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs +++ b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs @@ -357,7 +357,7 @@ private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory throw ch.ExceptDecode(); ch.AssertValue(codec); ch.CheckDecode(codec.Type.IsVector); - ch.CheckDecode(codec.Type.ItemType.IsText); + ch.CheckDecode(codec.Type.ItemType is TextType); var textCodec = (IValueCodec>>)codec; var bufferLen = ctx.Reader.ReadInt32(); diff --git a/src/Microsoft.ML.Data/Transforms/KeyToValue.cs b/src/Microsoft.ML.Data/Transforms/KeyToValue.cs index 0cf18b2772..bdad8e9bf4 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToValue.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToValue.cs @@ -464,7 +464,7 @@ public override JToken SavePfa(BoundPfaContext ctx, JToken srcToken) // probably, which I am not prepared to do. var defaultToken = PfaUtils.Type.DefaultTokenOrNull(TypeOutput); JArray jsonValues; - if (TypeOutput.IsText) + if (TypeOutput is TextType) { jsonValues = new JArray(); var keyValues = _values.GetValues(); diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index c49848465e..5522167c56 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -313,7 +313,7 @@ private void AddMetadata(int iinfo, MetadataBuilder builder) int metaKeyValuesCol = 0; if (inputMetadata.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out metaKeyValuesCol)) typeNames = inputMetadata.Schema[metaKeyValuesCol].Type; - if (typeNames == null || !typeNames.IsKnownSizeVector || !typeNames.ItemType.IsText || + if (typeNames == null || !typeNames.IsKnownSizeVector || !(typeNames.ItemType is TextType) || typeNames.VectorSize != _infos[iinfo].TypeSrc.ItemType.KeyCount) { typeNames = null; @@ -377,7 +377,7 @@ private void GetSlotNames(int iinfo, ref VBuffer> dst) var inputMetadata = InputSchema[_infos[iinfo].Source].Metadata; Contracts.AssertValue(inputMetadata); var typeSlotSrc = inputMetadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (typeSlotSrc != null && typeSlotSrc.VectorSize == typeSrc.VectorSize && typeSlotSrc.ItemType.IsText) + if (typeSlotSrc != null && typeSlotSrc.VectorSize == typeSrc.VectorSize && typeSlotSrc.ItemType is TextType) { inputMetadata.GetValue(MetadataUtils.Kinds.SlotNames, ref namesSlotSrc); Host.Check(namesSlotSrc.Length == typeSrc.VectorSize); @@ -758,12 +758,12 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); - if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType.IsPrimitive)) + if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType is PrimitiveType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); var metadata = new List(); if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.KeyValues, out var keyMeta)) - if (col.Kind != SchemaShape.Column.VectorKind.VariableVector && keyMeta.ItemType.IsText) + if (col.Kind != SchemaShape.Column.VectorKind.VariableVector && keyMeta.ItemType is TextType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, keyMeta.ItemType, false)); if (!colInfo.Bag && (col.Kind == SchemaShape.Column.VectorKind.Scalar || col.Kind == SchemaShape.Column.VectorKind.Vector)) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.CategoricalSlotRanges, SchemaShape.Column.VectorKind.Vector, NumberType.I4, false)); diff --git a/src/Microsoft.ML.Data/Transforms/MetadataDispatcher.cs b/src/Microsoft.ML.Data/Transforms/MetadataDispatcher.cs index 38ee0f82e0..7489b710b3 100644 --- a/src/Microsoft.ML.Data/Transforms/MetadataDispatcher.cs +++ b/src/Microsoft.ML.Data/Transforms/MetadataDispatcher.cs @@ -424,7 +424,7 @@ public void AddPrimitive(string kind, ColumnType type, TValue value) Contracts.CheckNonEmpty(kind, nameof(kind)); Contracts.CheckValue(type, nameof(type)); Contracts.CheckParam(type.RawType == typeof(TValue), nameof(type), "Given type doesn't match type parameter"); - Contracts.CheckParam(type.IsPrimitive, nameof(type), "Must be a primitive type"); + Contracts.CheckParam(type is PrimitiveType, nameof(type), "Must be a primitive type"); if (_getters != null && _getters.Any(g => g.Kind == kind)) throw Contracts.Except("Duplicate specification of metadata"); diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs index 1d0ee991d1..56e7508aa1 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs @@ -370,14 +370,14 @@ private AffineColumnFunction(IHost host) public static AffineColumnFunction Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { Contracts.CheckValue(host, nameof(host)); - if (typeSrc.IsNumber) + if (typeSrc is NumberType) { if (typeSrc == NumberType.R4) return Sng.ImplOne.Create(ctx, host, typeSrc); if (typeSrc == NumberType.R8) return Dbl.ImplOne.Create(ctx, host, typeSrc); } - else if (typeSrc.ItemType.IsNumber) + else if (typeSrc.ItemType is NumberType) { if (typeSrc.ItemType == NumberType.R4) return Sng.ImplVec.Create(ctx, host, typeSrc); @@ -487,14 +487,14 @@ public bool OnnxInfo(OnnxContext ctx, OnnxNode nodeProtoWrapper, int featureCoun public static CdfColumnFunction Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { Contracts.CheckValue(host, nameof(host)); - if (typeSrc.IsNumber) + if (typeSrc is NumberType) { if (typeSrc == NumberType.R4) return Sng.ImplOne.Create(ctx, host, typeSrc); if (typeSrc == NumberType.R8) return Dbl.ImplOne.Create(ctx, host, typeSrc); } - else if (typeSrc.ItemType.IsNumber) + else if (typeSrc.ItemType is NumberType) { if (typeSrc.ItemType == NumberType.R4) return Sng.ImplVec.Create(ctx, host, typeSrc); @@ -621,14 +621,14 @@ public void AttachMetadata(MetadataDispatcher.Builder bldr, ColumnType typeSrc) public static BinColumnFunction Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { Contracts.CheckValue(host, nameof(host)); - if (typeSrc.IsNumber) + if (typeSrc is NumberType) { if (typeSrc == NumberType.R4) return Sng.ImplOne.Create(ctx, host, typeSrc); if (typeSrc == NumberType.R8) return Dbl.ImplOne.Create(ctx, host, typeSrc); } - if (typeSrc.IsVector && typeSrc.ItemType.IsNumber) + if (typeSrc.IsVector && typeSrc.ItemType is NumberType) { if (typeSrc.ItemType == NumberType.R4) return Sng.ImplVec.Create(ctx, host, typeSrc); @@ -746,7 +746,7 @@ private ValueGetter GetLabelGetter(Row row, int col, out int labelCardinali { // The label column type is checked as part of args validation. var type = row.Schema[col].Type; - Host.Assert(type.IsKey || type.IsNumber); + Host.Assert(type.IsKey || type is NumberType); if (type.IsKey) { @@ -914,14 +914,14 @@ public static IColumnFunctionBuilder CreateBuilder(MinMaxArguments args, IHost h public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MinMaxColumn column, IHost host, int srcIndex, ColumnType srcType, RowCursor cursor) { - if (srcType.IsNumber) + if (srcType is NumberType) { if (srcType == NumberType.R4) return Sng.MinMaxOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); if (srcType == NumberType.R8) return Dbl.MinMaxOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); } - if (srcType.IsKnownSizeVector && srcType.ItemType.IsNumber) + if (srcType.IsKnownSizeVector && srcType.ItemType is NumberType) { if (srcType.ItemType == NumberType.R4) return Sng.MinMaxVecColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter>(srcIndex)); @@ -953,14 +953,14 @@ public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarC { Contracts.AssertValue(host); - if (srcType.IsNumber) + if (srcType is NumberType) { if (srcType == NumberType.R4) return Sng.MeanVarOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); if (srcType == NumberType.R8) return Dbl.MeanVarOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); } - if (srcType.IsKnownSizeVector && srcType.ItemType.IsNumber) + if (srcType.IsKnownSizeVector && srcType.ItemType is NumberType) { if (srcType.ItemType == NumberType.R4) return Sng.MeanVarVecColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter>(srcIndex)); @@ -993,14 +993,14 @@ public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanV Contracts.AssertValue(host); host.AssertValue(column); - if (srcType.IsNumber) + if (srcType is NumberType) { if (srcType == NumberType.R4) return Sng.MeanVarOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); if (srcType == NumberType.R8) return Dbl.MeanVarOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); } - if (srcType.IsKnownSizeVector && srcType.ItemType.IsNumber) + if (srcType.IsKnownSizeVector && srcType.ItemType is NumberType) { if (srcType.ItemType == NumberType.R4) return Sng.MeanVarVecColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter>(srcIndex)); @@ -1032,14 +1032,14 @@ public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.BinningC { Contracts.AssertValue(host); - if (srcType.IsNumber) + if (srcType is NumberType) { if (srcType == NumberType.R4) return Sng.BinOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); if (srcType == NumberType.R8) return Dbl.BinOneColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter(srcIndex)); } - if (srcType.IsKnownSizeVector && srcType.ItemType.IsNumber) + if (srcType.IsKnownSizeVector && srcType.ItemType is NumberType) { if (srcType.ItemType == NumberType.R4) return Sng.BinVecColumnFunctionBuilder.Create(column, host, srcType, cursor.GetGetter>(srcIndex)); @@ -1065,7 +1065,7 @@ public static IColumnFunctionBuilder CreateBuilder(SupervisedBinArguments args, if (labelColumnType.IsKey) host.CheckUserArg(labelColumnType.KeyCount > 0, nameof(args.LabelColumn), "Label column must have a known cardinality"); else - host.CheckUserArg(labelColumnType.IsNumber, nameof(args.LabelColumn), "Label column must be a number or a key type"); + host.CheckUserArg(labelColumnType is NumberType, nameof(args.LabelColumn), "Label column must be a number or a key type"); return CreateBuilder( new NormalizingEstimator.SupervisedBinningColumn( @@ -1091,14 +1091,14 @@ private static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.Supervi { Contracts.AssertValue(host); - if (srcType.IsNumber) + if (srcType is NumberType) { if (srcType == NumberType.R4) return Sng.SupervisedBinOneColumnFunctionBuilder.Create(column, host, srcIndex, labelColumnId, cursor); if (srcType == NumberType.R8) return Dbl.SupervisedBinOneColumnFunctionBuilder.Create(column, host, srcIndex, labelColumnId, cursor); } - if (srcType.IsVector && srcType.ItemType.IsNumber) + if (srcType.IsVector && srcType.ItemType is NumberType) { if (srcType.ItemType == NumberType.R4) return Sng.SupervisedBinVecColumnFunctionBuilder.Create(column, host, srcIndex, labelColumnId, cursor); diff --git a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs index 028a94cb87..c5c872e09c 100644 --- a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs @@ -382,7 +382,7 @@ public static ShufflePipe Create(int bufferSize, ColumnType type, Delegate gette pipeType = typeof(ImplVec<>).MakeGenericType(type.ItemType.RawType); else { - Contracts.Assert(type.IsPrimitive); + Contracts.Assert(type is PrimitiveType); pipeType = typeof(ImplOne<>).MakeGenericType(type.RawType); } if (_pipeConstructorTypes == null) diff --git a/src/Microsoft.ML.Data/Transforms/TransformBase.cs b/src/Microsoft.ML.Data/Transforms/TransformBase.cs index 141c6aceee..220ef374be 100644 --- a/src/Microsoft.ML.Data/Transforms/TransformBase.cs +++ b/src/Microsoft.ML.Data/Transforms/TransformBase.cs @@ -899,21 +899,21 @@ public override bool IsColumnActive(int col) protected static string TestIsText(ColumnType type) { - if (type.IsText) + if (type is TextType) return null; return "Expected Text type"; } protected static string TestIsTextItem(ColumnType type) { - if (type.ItemType.IsText) + if (type.ItemType is TextType) return null; return "Expected Text type"; } protected static string TestIsTextVector(ColumnType type) { - if (type.ItemType.IsText && type.IsVector) + if (type.ItemType is TextType && type.IsVector) return null; return "Expected vector of Text type"; } diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index 59ef43e684..a1a16155c8 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -364,7 +364,7 @@ internal static bool GetNewType(IExceptionContext ectx, ColumnType srcType, Data if (range != null) { itemType = TypeParsingUtils.ConstructKeyType(kind, range); - if (!srcType.ItemType.IsKey && !srcType.ItemType.IsText) + if (!srcType.ItemType.IsKey && !(srcType.ItemType is TextType)) return false; } else if (!(srcType.ItemType is KeyType key)) @@ -452,9 +452,9 @@ protected override Schema.DetachedColumn[] GetOutputColumnsCore() { builder.Add(InputSchema[ColMapNewToOld[i]].Metadata, name => name == MetadataUtils.Kinds.KeyValues); } - if (srcType.ItemType.IsNumber && _types[i].ItemType.IsNumber) + if (srcType.ItemType is NumberType && _types[i].ItemType is NumberType) builder.Add(InputSchema[ColMapNewToOld[i]].Metadata, name => name == MetadataUtils.Kinds.IsNormalized); - if (srcType.IsBool && _types[i].ItemType.IsNumber) + if (srcType is BoolType && _types[i].ItemType is NumberType) { ValueGetter getter = (ref bool dst) => dst = true; builder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, getter); @@ -558,7 +558,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) if (!Data.Conversion.Conversions.Instance.TryGetStandardConversion(col.ItemType, newType, out Delegate del, out bool identity)) throw Host.ExceptParam(nameof(inputSchema), $"Don't know how to convert {colInfo.Input} into {newType.ToString()}"); var metadata = new List(); - if (col.ItemType.IsBool && newType.ItemType.IsNumber) + if (col.ItemType is BoolType && newType.ItemType is NumberType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)); if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.SlotNames, out var slotMeta)) if (col.Kind == SchemaShape.Column.VectorKind.Vector) @@ -567,7 +567,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) if (col.ItemType.IsKey) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, keyMeta.ItemType, false)); if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.IsNormalized, out var normMeta)) - if (col.ItemType.IsNumber && newType.ItemType.IsNumber) + if (col.ItemType is NumberType && newType.ItemType is NumberType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, normMeta.ItemType, false)); result[colInfo.Output] = new SchemaShape.Column(colInfo.Output, col.Kind, newType, false, col.Metadata); } diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs index 7ab229169b..ab84f61356 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs @@ -548,7 +548,7 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData { Contracts.CheckValue(env, nameof(env)); env.CheckValue(args, nameof(args)); - env.Assert(!string.IsNullOrWhiteSpace(args.DataFile)); + env.CheckUserArg(!string.IsNullOrWhiteSpace(args.DataFile), nameof(args.DataFile)); env.CheckValueOrNull(args.KeyColumn); env.CheckValueOrNull(args.ValueColumn); diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index a66c731820..541b94316a 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -60,7 +60,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); - if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType.IsPrimitive)) + if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType is PrimitiveType)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); SchemaShape metadata; diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs index e35571c320..198497596f 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs @@ -263,7 +263,7 @@ private static (string input, string output)[] GetColumnPairs(ColumnInfo[] colum internal string TestIsKnownDataKind(ColumnType type) { - if (type.ItemType.RawKind != default && (type.IsVector || type.IsPrimitive)) + if (type.ItemType.RawKind != default && (type.IsVector || type is PrimitiveType)) return null; return "standard type or a vector of standard type"; } @@ -758,7 +758,7 @@ protected override Delegate MakeGetter(Row input, int iinfo, Func act private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string srcVariableName, string dstVariableName) { - if (!info.TypeSrc.ItemType.IsText) + if (!(info.TypeSrc.ItemType is TextType)) return false; var terms = default(VBuffer>); @@ -834,7 +834,7 @@ private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToke Contracts.AssertValue(srcToken); //Contracts.Assert(CanSavePfa); - if (!info.TypeSrc.ItemType.IsText) + if (!(info.TypeSrc.ItemType is TextType)) return null; var terms = default(VBuffer>); TermMap> map = (TermMap>)_termMap[iinfo].Map; diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformerImpl.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformerImpl.cs index 624e0e1c89..290b140271 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformerImpl.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformerImpl.cs @@ -39,7 +39,7 @@ protected Builder(PrimitiveType type) public static Builder Create(ColumnType type, SortOrder sortOrder) { Contracts.AssertValue(type); - Contracts.Assert(type.IsVector || type.IsPrimitive); + Contracts.Assert(type.IsVector || type is PrimitiveType); // Right now we have only two. This "public" interface externally looks like it might // accept any value, but currently the internal implementations of Builder are split // along this being a purely binary option, for now (though this can easily change @@ -49,7 +49,7 @@ public static Builder Create(ColumnType type, SortOrder sortOrder) PrimitiveType itemType = type.ItemType as PrimitiveType; Contracts.AssertValue(itemType); - if (itemType.IsText) + if (itemType is TextType) return new TextImpl(sorted); return Utils.MarshalInvoke(CreateCore, itemType.RawType, itemType, sorted); } @@ -289,7 +289,7 @@ public static Trainer Create(Row row, int col, bool autoConvert, int count, Buil var type = schema[col].Type; Contracts.Assert(autoConvert || bldr.ItemType == type.ItemType); // Auto conversion should only be possible when the type is text. - Contracts.Assert(type.IsText || !autoConvert); + Contracts.Assert(type is TextType || !autoConvert); if (type.IsVector) return Utils.MarshalInvoke(CreateVec, bldr.ItemType.RawType, row, col, count, bldr); return Utils.MarshalInvoke(CreateOne, bldr.ItemType.RawType, row, col, autoConvert, count, bldr); @@ -527,7 +527,7 @@ internal static TermMap Load(ModelLoadContext ctx, IHostEnvironment ectx, CodecF IValueCodec codec; if (!codecFactory.TryReadCodec(ctx.Reader.BaseStream, out codec)) throw ectx.Except("Unrecognized codec read"); - ectx.CheckDecode(codec.Type.IsPrimitive); + ectx.CheckDecode(codec.Type is PrimitiveType); int count = ctx.Reader.ReadInt32(); ectx.CheckDecode(count >= 0); return Utils.MarshalInvoke(LoadCodecCore, codec.Type.RawType, ctx, ectx, codec, count); @@ -544,7 +544,7 @@ private static TermMap LoadCodecCore(ModelLoadContext ctx, IExceptionContext ectx.AssertValue(ctx); ectx.AssertValue(codec); ectx.Assert(codec is IValueCodec); - ectx.Assert(codec.Type.IsPrimitive); + ectx.Assert(codec.Type is PrimitiveType); ectx.Assert(count >= 0); IValueCodec codecT = (IValueCodec)codec; @@ -699,7 +699,7 @@ internal override void Save(ModelSaveContext ctx, IHostEnvironment host, CodecFa throw host.Except("We do not know how to serialize terms of type '{0}'", ItemType); ctx.Writer.Write((byte)MapType.Codec); host.Assert(codec.Type.Equals(ItemType)); - host.Assert(codec.Type.IsPrimitive); + host.Assert(codec.Type is PrimitiveType); codecFactory.WriteCodec(ctx.Writer.BaseStream, codec); IValueCodec codecT = (IValueCodec)codec; ctx.Writer.Write(_values.Count); @@ -1041,7 +1041,7 @@ public override void AddMetadata(MetadataBuilder builder) { if (TypedMap.Count == 0) return; - if (IsTextMetadata && !TypedMap.ItemType.IsText) + if (IsTextMetadata && !(TypedMap.ItemType is TextType)) { var conv = Data.Conversion.Conversions.Instance; var stringMapper = conv.GetStringConversion(TypedMap.ItemType); @@ -1133,7 +1133,7 @@ private bool AddMetadataCore(ColumnType srcMetaType, MetadataBuilder buil dst = editor.Commit(); }; - if (IsTextMetadata && !srcMetaType.IsText) + if (IsTextMetadata && !(srcMetaType is TextType)) { var stringMapper = convInst.GetStringConversion(srcMetaType); ValueGetter>> mgetter = diff --git a/src/Microsoft.ML.Data/Utilities/ColumnCursor.cs b/src/Microsoft.ML.Data/Utilities/ColumnCursor.cs index 5928e8ece7..55bb6bcf2e 100644 --- a/src/Microsoft.ML.Data/Utilities/ColumnCursor.cs +++ b/src/Microsoft.ML.Data/Utilities/ColumnCursor.cs @@ -43,7 +43,7 @@ public static IEnumerable GetColumn(this IDataView data, IHostEnvironment // Direct mapping is possible. return GetColumnDirect(data, col); } - else if (typeof(T) == typeof(string) && colType.IsText) + else if (typeof(T) == typeof(string) && colType is TextType) { // Special case of ROM to string conversion. Delegate convert = (Func, string>)((ReadOnlyMemory txt) => txt.ToString()); @@ -64,7 +64,7 @@ public static IEnumerable GetColumn(this IDataView data, IHostEnvironment var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType); return (IEnumerable)meth.Invoke(null, new object[] { data, col }); } - else if (elementType == typeof(string) && colType.ItemType.IsText) + else if (elementType == typeof(string) && colType.ItemType is TextType) { // Conversion of DvText items to string items. Delegate convert = (Func, string>)((ReadOnlyMemory txt) => txt.ToString()); diff --git a/src/Microsoft.ML.Data/Utilities/ComponentCreation.cs b/src/Microsoft.ML.Data/Utilities/ComponentCreation.cs index 8af4fde39f..9e3962f4b6 100644 --- a/src/Microsoft.ML.Data/Utilities/ComponentCreation.cs +++ b/src/Microsoft.ML.Data/Utilities/ComponentCreation.cs @@ -93,7 +93,7 @@ public static IDataView CreateDataView(this IHostEnvironment env, IList is implemented. + /// results. /// /// The user-defined item type. /// The host environment to use for data view creation. @@ -110,46 +110,6 @@ public static IDataView CreateStreamingDataView(this IHostEnvironment env, return DataViewConstructionUtils.CreateFromEnumerable(env, data, schemaDefinition); } - /// - /// Create a batch prediction engine. - /// - /// The host environment to use. - /// The stream to deserialize the pipeline (transforms and predictor) from. - /// Whether to ignore missing columns in the data view. - /// The optional input schema. If null, the schema is inferred from the type. - /// The optional output schema. If null, the schema is inferred from the type. - internal static BatchPredictionEngine CreateBatchPredictionEngine(this IHostEnvironment env, Stream modelStream, - bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) - where TSrc : class - where TDst : class, new() - { - Contracts.CheckValue(env, nameof(env)); - env.CheckValue(modelStream, nameof(modelStream)); - env.CheckValueOrNull(inputSchemaDefinition); - env.CheckValueOrNull(outputSchemaDefinition); - return new BatchPredictionEngine(env, modelStream, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); - } - - /// - /// Create a batch prediction engine. - /// - /// The host environment to use. - /// The transformation pipe that may or may not include a scorer. - /// Whether to ignore missing columns in the data view. - /// The optional input schema. If null, the schema is inferred from the type. - /// The optional output schema. If null, the schema is inferred from the type. - internal static BatchPredictionEngine CreateBatchPredictionEngine(this IHostEnvironment env, IDataView dataPipe, - bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) - where TSrc : class - where TDst : class, new() - { - Contracts.CheckValue(env, nameof(env)); - env.CheckValue(dataPipe, nameof(dataPipe)); - env.CheckValueOrNull(inputSchemaDefinition); - env.CheckValueOrNull(outputSchemaDefinition); - return new BatchPredictionEngine(env, dataPipe, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); - } - /// /// Create an on-demand prediction engine. /// diff --git a/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs b/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs index 2416701f3c..2a616cf2fa 100644 --- a/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs +++ b/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs @@ -321,7 +321,7 @@ private static TOut CreatePipelineEnsemble(IHostEnvironment env, Predictor /// This method is used for comparing pipelines. Its outputs can be passed to /// to check if this pipeline is identical to another pipeline. /// - private static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data, + internal static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data, out byte[][] dataSerialized, out string[] dataZipEntryNames) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Ensemble/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Ensemble/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..07a909e038 --- /dev/null +++ b/src/Microsoft.ML.Ensemble/Properties/AssemblyInfo.cs @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo("RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo("Microsoft.ML.Runtime.Scope" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.EntryPoints/CrossValidationMacro.cs b/src/Microsoft.ML.EntryPoints/CrossValidationMacro.cs index ce64661b57..a69b301014 100644 --- a/src/Microsoft.ML.EntryPoints/CrossValidationMacro.cs +++ b/src/Microsoft.ML.EntryPoints/CrossValidationMacro.cs @@ -14,9 +14,6 @@ [assembly: LoadableClass(typeof(void), typeof(CrossValidationMacro), null, typeof(SignatureEntryPointModule), "CrossValidationMacro")] -// The warning #612 is disabled because the following code uses a lot of things in Legacy.Models and Legacy.Transforms while Legacy is marked as obsolete. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 namespace Microsoft.ML.EntryPoints { diff --git a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs index cb04432ec1..ed3a554a64 100644 --- a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs +++ b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs @@ -120,7 +120,7 @@ private static string GetTerms(IDataView data, string colName) if (!schema.TryGetColumnIndex(colName, out col)) return null; var type = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type; - if (type == null || !type.IsKnownSizeVector || !type.ItemType.IsText) + if (type == null || !type.IsKnownSizeVector || !(type.ItemType is TextType)) return null; var metadata = default(VBuffer>); schema[col].Metadata.GetValue(MetadataUtils.Kinds.KeyValues, ref metadata); @@ -177,7 +177,7 @@ private static IDataView ApplyConvert(List continue; } } - if (type.IsNumber || type.IsBool) + if (type is NumberType || type is BoolType) { // Even if the column is R4 in training, we still want to add it to the conversion. // The reason is that at scoring time, the column might have a slightly different type (R8 for example). @@ -235,7 +235,7 @@ public static CommonOutputs.TransformOutput PrepareClassificationLabel(IHostEnvi if (!input.Data.Schema.TryGetColumnIndex(input.LabelColumn, out labelCol)) throw host.Except($"Column '{input.LabelColumn}' not found."); var labelType = input.Data.Schema[labelCol].Type; - if (labelType.IsKey || labelType.IsBool) + if (labelType.IsKey || labelType is BoolType) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; @@ -270,7 +270,7 @@ public static CommonOutputs.TransformOutput ConvertPredictedLabel(IHostEnvironme if (!input.Data.Schema.TryGetColumnIndex(input.PredictedLabelColumn, out predictedLabelCol)) throw host.Except($"Column '{input.PredictedLabelColumn}' not found."); var predictedLabelType = input.Data.Schema[predictedLabelCol].Type; - if (predictedLabelType.IsNumber || predictedLabelType.IsBool) + if (predictedLabelType is NumberType || predictedLabelType is BoolType) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; @@ -292,7 +292,7 @@ public static CommonOutputs.TransformOutput PrepareRegressionLabel(IHostEnvironm if (!input.Data.Schema.TryGetColumnIndex(input.LabelColumn, out labelCol)) throw host.Except($"Column '{input.LabelColumn}' not found."); var labelType = input.Data.Schema[labelCol].Type; - if (labelType == NumberType.R4 || !labelType.IsNumber) + if (labelType == NumberType.R4 || !(labelType is NumberType)) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; diff --git a/src/Microsoft.ML.EntryPoints/ImportTextData.cs b/src/Microsoft.ML.EntryPoints/ImportTextData.cs index 69e7c6c073..0e550446e3 100644 --- a/src/Microsoft.ML.EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML.EntryPoints/ImportTextData.cs @@ -9,9 +9,6 @@ [assembly: LoadableClass(typeof(void), typeof(ImportTextData), null, typeof(SignatureEntryPointModule), "ImportTextData")] -// The warning #612 is disabled because the following code uses legacy TextLoader. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 namespace Microsoft.ML.EntryPoints { /// @@ -44,6 +41,25 @@ public static Output ImportText(IHostEnvironment env, Input input) var loader = host.CreateLoader(string.Format("Text{{{0}}}", input.CustomSchema), new FileHandleSource(input.InputFile)); return new Output { Data = loader }; } + + public sealed class LoaderInput + { + [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)] + public IFileHandle InputFile; + + [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 2)] + public TextLoader.Arguments Arguments = new TextLoader.Arguments(); + } + + [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] + public static Output TextLoader(IHostEnvironment env, LoaderInput input) + { + Contracts.CheckValue(env, nameof(env)); + var host = env.Register("ImportTextData"); + env.CheckValue(input, nameof(input)); + EntryPointUtils.CheckInputArgs(host, input); + var loader = host.CreateLoader(input.Arguments, new FileHandleSource(input.InputFile)); + return new Output { Data = loader }; + } } } -#pragma warning restore 612 diff --git a/src/Microsoft.ML.EntryPoints/MacroUtils.cs b/src/Microsoft.ML.EntryPoints/MacroUtils.cs index 466bd96d41..a6e1dbfd30 100644 --- a/src/Microsoft.ML.EntryPoints/MacroUtils.cs +++ b/src/Microsoft.ML.EntryPoints/MacroUtils.cs @@ -9,9 +9,6 @@ [assembly: EntryPointModule(typeof(MacroUtils))] -// The warning #612 is disabled because the following code uses a lot of things in Legacy.Models while Legacy.Model is marked as obsolete. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 namespace Microsoft.ML.EntryPoints { public static class MacroUtils diff --git a/src/Microsoft.ML.EntryPoints/OneVersusAllMacro.cs b/src/Microsoft.ML.EntryPoints/OneVersusAllMacro.cs index 857ec4afe7..6b117c181c 100644 --- a/src/Microsoft.ML.EntryPoints/OneVersusAllMacro.cs +++ b/src/Microsoft.ML.EntryPoints/OneVersusAllMacro.cs @@ -14,9 +14,6 @@ [assembly: LoadableClass(typeof(void), typeof(OneVersusAllMacro), null, typeof(SignatureEntryPointModule), "OneVersusAllMacro")] -// The warning #612 is disabled because the following code uses Legacy.Models and Legacy.Transforms while Legacy is marked as obsolete. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 namespace Microsoft.ML.EntryPoints { /// diff --git a/src/Microsoft.ML.EntryPoints/Properties/AssemblyInfo.cs b/src/Microsoft.ML.EntryPoints/Properties/AssemblyInfo.cs index 0169e5d896..4c95a14f67 100644 --- a/src/Microsoft.ML.EntryPoints/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.EntryPoints/Properties/AssemblyInfo.cs @@ -6,4 +6,5 @@ using Microsoft.ML; [assembly: InternalsVisibleTo("Microsoft.ML.Tests" + PublicKey.TestValue)] -[assembly: InternalsVisibleTo("Microsoft.ML.Core.Tests" + PublicKey.TestValue)] \ No newline at end of file +[assembly: InternalsVisibleTo("Microsoft.ML.Core.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo("RunTests" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.EntryPoints/TrainTestMacro.cs b/src/Microsoft.ML.EntryPoints/TrainTestMacro.cs index 260ba7b756..8c53caf7be 100644 --- a/src/Microsoft.ML.EntryPoints/TrainTestMacro.cs +++ b/src/Microsoft.ML.EntryPoints/TrainTestMacro.cs @@ -12,9 +12,6 @@ [assembly: LoadableClass(typeof(void), typeof(TrainTestMacro), null, typeof(SignatureEntryPointModule), "TrainTestMacro")] -// The warning #612 is disabled because the following code uses a lot of things in Legacy.Models and Legacy.Transforms while Legacy is marked as obsolete. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 namespace Microsoft.ML.EntryPoints { public static class TrainTestMacro diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 9c5e7ec4cd..50fb7b9701 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -2837,6 +2837,18 @@ public abstract class TreeEnsembleModelParameters : bool ICanSavePfa.CanSavePfa => true; bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true; + + /// + /// Used to determine the contribution of each feature to the score of an example by . + /// The calculation of feature contribution essentially consists in determining which splits in the tree have the most impact + /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature + /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered. + /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1 + /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false + /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score + /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with + /// many decision trees. + /// public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this); public TreeEnsembleModelParameters(IHostEnvironment env, string name, TreeEnsemble trainedEnsemble, int numFeatures, string innerArgs) diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs index e14fd33db0..9485396078 100644 --- a/src/Microsoft.ML.FastTree/FastTreeArguments.cs +++ b/src/Microsoft.ML.FastTree/FastTreeArguments.cs @@ -138,12 +138,13 @@ public enum Bundle : Byte Adjacent = 2 } + [BestFriend] internal static class Defaults { - internal const int NumTrees = 100; - internal const int NumLeaves = 20; - internal const int MinDocumentsInLeaves = 10; - internal const double LearningRates = 0.2; + public const int NumTrees = 100; + public const int NumLeaves = 20; + public const int MinDocumentsInLeaves = 10; + public const double LearningRates = 0.2; } public abstract class TreeArgs : LearnerInputBaseWithGroupId diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 3c6edf0acc..f779580a8d 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -668,6 +668,11 @@ public abstract class GamModelParametersBase : ModelParametersBase, IValu ColumnType IValueMapper.InputType => _inputType; ColumnType IValueMapper.OutputType => _outputType; + /// + /// Used to determine the contribution of each feature to the score of an example by . + /// For Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at + /// the feature value. + /// public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this); private protected GamModelParametersBase(IHostEnvironment env, string name, diff --git a/src/Microsoft.ML.FastTree/Properties/AssemblyInfo.cs b/src/Microsoft.ML.FastTree/Properties/AssemblyInfo.cs index cf6d8d8d42..b5261449b2 100644 --- a/src/Microsoft.ML.FastTree/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.FastTree/Properties/AssemblyInfo.cs @@ -10,4 +10,9 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGBM" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Sweeper" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.FastTree" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs b/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs index 53d475df1e..108aeeac87 100644 --- a/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs +++ b/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs @@ -566,7 +566,7 @@ public static void ToByteArray(this MD5Hash[] a, byte[] buffer, ref int position } } - public static unsafe MD5Hash[] ToUInt128Array(this byte[] buffer, ref int position) + public static unsafe MD5Hash[] ToRowIdArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); MD5Hash[] a = new MD5Hash[length]; diff --git a/src/Microsoft.ML.HalLearners.StaticPipe/Microsoft.ML.HalLearners.StaticPipe.csproj b/src/Microsoft.ML.HalLearners.StaticPipe/Microsoft.ML.HalLearners.StaticPipe.csproj index aabe2dec4c..715cc7df2b 100644 --- a/src/Microsoft.ML.HalLearners.StaticPipe/Microsoft.ML.HalLearners.StaticPipe.csproj +++ b/src/Microsoft.ML.HalLearners.StaticPipe/Microsoft.ML.HalLearners.StaticPipe.csproj @@ -7,6 +7,7 @@ + diff --git a/src/Microsoft.ML.HalLearners/Properties/AssemblyInfo.cs b/src/Microsoft.ML.HalLearners/Properties/AssemblyInfo.cs index 694c53c65f..378fcf459a 100644 --- a/src/Microsoft.ML.HalLearners/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.HalLearners/Properties/AssemblyInfo.cs @@ -7,4 +7,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners.StaticPipe" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.ImageAnalytics/ImageGrayscaleTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImageGrayscaleTransform.cs index 5cb7642893..b98a9bcd9d 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageGrayscaleTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageGrayscaleTransform.cs @@ -16,8 +16,6 @@ using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; [assembly: LoadableClass(ImageGrayscaleTransform.Summary, typeof(IDataTransform), typeof(ImageGrayscaleTransform), typeof(ImageGrayscaleTransform.Arguments), typeof(SignatureDataTransform), ImageGrayscaleTransform.UserName, "ImageGrayscaleTransform", "ImageGrayscale")] @@ -239,51 +237,5 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } - - private interface IColInput - { - PipelineColumn Input { get; } - } - - internal sealed class OutPipelineColumn : Custom, IColInput - { - public PipelineColumn Input { get; } - - public OutPipelineColumn(Custom input) - : base(Reconciler.Inst, input) - { - Contracts.AssertValue(input); - Contracts.Assert(typeof(T) == typeof(Bitmap) || typeof(T) == typeof(UnknownSizeBitmap)); - Input = input; - } - } - - /// - /// Reconciler to an for the . - /// - /// Because we want to use the same reconciler for - /// - /// - private sealed class Reconciler : EstimatorReconciler - { - public static Reconciler Inst = new Reconciler(); - - private Reconciler() { } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - var cols = new (string input, string output)[toOutput.Length]; - for (int i = 0; i < toOutput.Length; ++i) - { - var outCol = (IColInput)toOutput[i]; - cols[i] = (inputNames[outCol.Input], outputNames[toOutput[i]]); - } - return new ImageGrayscalingEstimator(env, cols); - } - } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs index a79085b606..d303cef3e4 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs @@ -16,8 +16,6 @@ using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; [assembly: LoadableClass(ImageLoaderTransform.Summary, typeof(IDataTransform), typeof(ImageLoaderTransform), typeof(ImageLoaderTransform.Arguments), typeof(SignatureDataTransform), ImageLoaderTransform.UserName, "ImageLoaderTransform", "ImageLoader")] @@ -115,7 +113,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol) { - if (!inputSchema[srcCol].Type.IsText) + if (!(inputSchema[srcCol].Type is TextType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].input, TextType.Instance.ToString(), inputSchema[srcCol].Type.ToString()); } @@ -234,7 +232,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input); - if (!col.ItemType.IsText || col.Kind != SchemaShape.Column.VectorKind.Scalar) + if (!(col.ItemType is TextType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, TextType.Instance.ToString(), col.GetTypeString()); result[output] = new SchemaShape.Column(output, SchemaShape.Column.VectorKind.Scalar, _imageType, false); @@ -242,62 +240,5 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } - - internal sealed class OutPipelineColumn : Custom - { - private readonly Scalar _input; - - public OutPipelineColumn(Scalar path, string relativeTo) - : base(new Reconciler(relativeTo), path) - { - Contracts.AssertValue(path); - _input = path; - } - - /// - /// Reconciler to an for the . - /// - /// - /// We must create a new reconciler per call, because the relative path of - /// is considered a transform-wide option, as it is not specified in . However, we still - /// implement so the analyzer can still equate two of these things if they happen to share the same - /// path, so we can be a bit more efficient with respect to our estimator declarations. - /// - /// - private sealed class Reconciler : EstimatorReconciler, IEquatable - { - private readonly string _relTo; - - public Reconciler(string relativeTo) - { - Contracts.AssertValueOrNull(relativeTo); - _relTo = relativeTo; - } - - public bool Equals(Reconciler other) - => other != null && other._relTo == _relTo; - - public override bool Equals(object obj) - => obj is Reconciler other && Equals(other); - - public override int GetHashCode() - => _relTo?.GetHashCode() ?? 0; - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - var cols = new (string input, string output)[toOutput.Length]; - for (int i = 0; i < toOutput.Length; ++i) - { - var outCol = (OutPipelineColumn)toOutput[i]; - cols[i] = (inputNames[outCol._input], outputNames[outCol]); - } - return new ImageLoadingEstimator(env, _relTo, cols); - } - } - } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index dd76d71da7..0f5b218d1a 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -16,8 +16,6 @@ using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; [assembly: LoadableClass(ImagePixelExtractorTransform.Summary, typeof(IDataTransform), typeof(ImagePixelExtractorTransform), typeof(ImagePixelExtractorTransform.Arguments), typeof(SignatureDataTransform), ImagePixelExtractorTransform.UserName, "ImagePixelExtractorTransform", "ImagePixelExtractor")] @@ -648,70 +646,5 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } - - private interface IColInput - { - Custom Input { get; } - - ImagePixelExtractorTransform.ColumnInfo MakeColumnInfo(string input, string output); - } - - internal sealed class OutPipelineColumn : Vector, IColInput - { - public Custom Input { get; } - private static readonly ImagePixelExtractorTransform.Arguments _defaultArgs = new ImagePixelExtractorTransform.Arguments(); - private readonly ImagePixelExtractorTransform.Column _colParam; - - public OutPipelineColumn(Custom input, ImagePixelExtractorTransform.Column col) - : base(Reconciler.Inst, input) - { - Contracts.AssertValue(input); - Contracts.Assert(typeof(T) == typeof(float) || typeof(T) == typeof(byte)); - Input = input; - _colParam = col; - } - - public ImagePixelExtractorTransform.ColumnInfo MakeColumnInfo(string input, string output) - { - // In principle, the analyzer should only call the the reconciler once for these columns. - Contracts.Assert(_colParam.Source == null); - Contracts.Assert(_colParam.Name == null); - - _colParam.Name = output; - _colParam.Source = input; - return new ImagePixelExtractorTransform.ColumnInfo(_colParam, _defaultArgs); - } - } - - /// - /// Reconciler to an for the . - /// - /// Because we want to use the same reconciler for - /// - /// - private sealed class Reconciler : EstimatorReconciler - { - /// - /// Because there are no global settings that cannot be overridden, we can always just use the same reconciler. - /// - public static Reconciler Inst = new Reconciler(); - - private Reconciler() { } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - var cols = new ImagePixelExtractorTransform.ColumnInfo[toOutput.Length]; - for (int i = 0; i < toOutput.Length; ++i) - { - var outCol = (IColInput)toOutput[i]; - cols[i] = outCol.MakeColumnInfo(inputNames[outCol.Input], outputNames[toOutput[i]]); - } - return new ImagePixelExtractingEstimator(env, cols); - } - } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizerTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizerTransform.cs index 3d795846ba..dab06be37a 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizerTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizerTransform.cs @@ -16,8 +16,6 @@ using Microsoft.ML.Internal.Internallearn; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; [assembly: LoadableClass(ImageResizerTransform.Summary, typeof(IDataTransform), typeof(ImageResizerTransform), typeof(ImageResizerTransform.Arguments), typeof(SignatureDataTransform), ImageResizerTransform.UserName, "ImageResizerTransform", "ImageResizer")] @@ -459,58 +457,5 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } - - internal sealed class OutPipelineColumn : Custom - { - private readonly PipelineColumn _input; - private readonly int _width; - private readonly int _height; - private readonly ImageResizerTransform.ResizingKind _resizing; - private readonly ImageResizerTransform.Anchor _cropAnchor; - - public OutPipelineColumn(PipelineColumn input, int width, int height, - ImageResizerTransform.ResizingKind resizing, ImageResizerTransform.Anchor cropAnchor) - : base(Reconciler.Inst, input) - { - Contracts.AssertValue(input); - _input = input; - _width = width; - _height = height; - _resizing = resizing; - _cropAnchor = cropAnchor; - } - - private ImageResizerTransform.ColumnInfo MakeColumnInfo(string input, string output) - => new ImageResizerTransform.ColumnInfo(input, output, _width, _height, _resizing, _cropAnchor); - - /// - /// Reconciler to an for the . - /// - /// - /// - private sealed class Reconciler : EstimatorReconciler - { - public static Reconciler Inst = new Reconciler(); - - private Reconciler() - { - } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - var cols = new ImageResizerTransform.ColumnInfo[toOutput.Length]; - for (int i = 0; i < toOutput.Length; ++i) - { - var outCol = (OutPipelineColumn)toOutput[i]; - cols[i] = outCol.MakeColumnInfo(inputNames[outCol._input], outputNames[outCol]); - } - return new ImageResizingEstimator(env, cols); - } - } - } } } diff --git a/src/Microsoft.ML.Legacy/Properties/AssemblyInfo.cs b/src/Microsoft.ML.ImageAnalytics/Properties/AssemblyInfo.cs similarity index 63% rename from src/Microsoft.ML.Legacy/Properties/AssemblyInfo.cs rename to src/Microsoft.ML.ImageAnalytics/Properties/AssemblyInfo.cs index 297305755a..443f2304db 100644 --- a/src/Microsoft.ML.Legacy/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.ImageAnalytics/Properties/AssemblyInfo.cs @@ -5,5 +5,6 @@ using System.Runtime.CompilerServices; using Microsoft.ML; -[assembly: InternalsVisibleTo("Microsoft.ML.Tests" + PublicKey.TestValue)] -[assembly: InternalsVisibleTo("Microsoft.ML.Core.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 35f64309d6..fbcc59d7cb 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -44,10 +44,10 @@ public enum InitAlgorithm KMeansParallel = 2 } + [BestFriend] internal static class Defaults{ - /// The number of clusters. - internal const int K = 5; + public const int K = 5; } public class Arguments : UnsupervisedLearnerInputBaseWithWeight diff --git a/src/Microsoft.ML.KMeansClustering/Properties/AssemblyInfo.cs b/src/Microsoft.ML.KMeansClustering/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..4cfdbca7bb --- /dev/null +++ b/src/Microsoft.ML.KMeansClustering/Properties/AssemblyInfo.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Legacy/AssemblyRegistration.cs b/src/Microsoft.ML.Legacy/AssemblyRegistration.cs deleted file mode 100644 index 72e2debc77..0000000000 --- a/src/Microsoft.ML.Legacy/AssemblyRegistration.cs +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Reflection; -using Microsoft.ML.Data; -using Microsoft.ML.Ensemble; -using Microsoft.ML.Sweeper; -using Microsoft.ML.Tools; -using Microsoft.ML.Trainers.FastTree; -using Microsoft.ML.Trainers.KMeans; -using Microsoft.ML.Trainers.PCA; -using Microsoft.ML.Transforms.Categorical; - -namespace Microsoft.ML -{ - internal static class AssemblyRegistration - { - private static readonly Lazy _assemblyInitializer = new Lazy(LoadStandardAssemblies); - - public static void RegisterAssemblies(IHostEnvironment environment) - { - // ensure all the assemblies in the Microsoft.ML package have been loaded - if (!_assemblyInitializer.IsValueCreated) - { - _ = _assemblyInitializer.Value; - Contracts.Assert(_assemblyInitializer.Value); - } - -#pragma warning disable CS0618 // The legacy API that internally uses dependency injection for all calls will be deleted anyway. - AssemblyLoadingUtils.RegisterCurrentLoadedAssemblies(environment); -#pragma warning restore CS0618 - } - - /// - /// Loads all the assemblies in the Microsoft.ML package that contain components. - /// - private static bool LoadStandardAssemblies() - { - Assembly dataAssembly = typeof(TextLoader).Assembly; // ML.Data - AssemblyName dataAssemblyName = dataAssembly.GetName(); - - _ = typeof(EnsembleModelParameters).Assembly; // ML.Ensemble - _ = typeof(FastTreeBinaryModelParameters).Assembly; // ML.FastTree - _ = typeof(KMeansModelParameters).Assembly; // ML.KMeansClustering - _ = typeof(Maml).Assembly; // ML.Maml - _ = typeof(PcaModelParameters).Assembly; // ML.PCA - _ = typeof(SweepCommand).Assembly; // ML.Sweeper - _ = typeof(OneHotEncodingTransformer).Assembly; // ML.Transforms - - // The following assemblies reference this assembly, so we can't directly reference them - //_ = typeof(Microsoft.ML.Data.LinearPredictor).Assembly); // ML.StandardLearners - _ = Assembly.Load(new AssemblyName() - { - Name = "Microsoft.ML.StandardLearners", - Version = dataAssemblyName.Version, //assume the same version as ML.Data - }); - - return true; - } - } -} diff --git a/src/Microsoft.ML.Legacy/CSharpApi.cs b/src/Microsoft.ML.Legacy/CSharpApi.cs deleted file mode 100644 index b607908914..0000000000 --- a/src/Microsoft.ML.Legacy/CSharpApi.cs +++ /dev/null @@ -1,23166 +0,0 @@ -//------------------------------------------------------------------------------ -// -// This code was generated by a tool. -// -// Changes to this file may cause incorrect behavior and will be lost if -// the code is regenerated. -// -//------------------------------------------------------------------------------ -#pragma warning disable -using System.Collections.Generic; -using Microsoft.ML; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using Newtonsoft.Json; -using System; -using System.Linq; -using Microsoft.ML.CommandLine; - -namespace Microsoft.ML -{ - public sealed partial class Experiment - { - [Obsolete] - public Microsoft.ML.Legacy.Data.CustomTextLoader.Output Add(Microsoft.ML.Legacy.Data.CustomTextLoader input) - { - var output = new Microsoft.ML.Legacy.Data.CustomTextLoader.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Data.CustomTextLoader input, Microsoft.ML.Legacy.Data.CustomTextLoader.Output output) - { - _jsonNodes.Add(Serialize("Data.CustomTextLoader", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Data.DataViewReference.Output Add(Microsoft.ML.Legacy.Data.DataViewReference input) - { - var output = new Microsoft.ML.Legacy.Data.DataViewReference.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Data.DataViewReference input, Microsoft.ML.Legacy.Data.DataViewReference.Output output) - { - _jsonNodes.Add(Serialize("Data.DataViewReference", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Data.IDataViewArrayConverter.Output Add(Microsoft.ML.Legacy.Data.IDataViewArrayConverter input) - { - var output = new Microsoft.ML.Legacy.Data.IDataViewArrayConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Data.IDataViewArrayConverter input, Microsoft.ML.Legacy.Data.IDataViewArrayConverter.Output output) - { - _jsonNodes.Add(Serialize("Data.IDataViewArrayConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Data.PredictorModelArrayConverter.Output Add(Microsoft.ML.Legacy.Data.PredictorModelArrayConverter input) - { - var output = new Microsoft.ML.Legacy.Data.PredictorModelArrayConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Data.PredictorModelArrayConverter input, Microsoft.ML.Legacy.Data.PredictorModelArrayConverter.Output output) - { - _jsonNodes.Add(Serialize("Data.PredictorModelArrayConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Data.TextLoader.Output Add(Microsoft.ML.Legacy.Data.TextLoader input) - { - var output = new Microsoft.ML.Legacy.Data.TextLoader.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Data.TextLoader input, Microsoft.ML.Legacy.Data.TextLoader.Output output) - { - _jsonNodes.Add(Serialize("Data.TextLoader", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.AnomalyDetectionEvaluator.Output Add(Microsoft.ML.Legacy.Models.AnomalyDetectionEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.AnomalyDetectionEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.AnomalyDetectionEvaluator input, Microsoft.ML.Legacy.Models.AnomalyDetectionEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.AnomalyDetectionEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.AnomalyPipelineEnsemble.Output Add(Microsoft.ML.Legacy.Models.AnomalyPipelineEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.AnomalyPipelineEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.AnomalyPipelineEnsemble input, Microsoft.ML.Legacy.Models.AnomalyPipelineEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.AnomalyPipelineEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.BinaryClassificationEvaluator.Output Add(Microsoft.ML.Legacy.Models.BinaryClassificationEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.BinaryClassificationEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.BinaryClassificationEvaluator input, Microsoft.ML.Legacy.Models.BinaryClassificationEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.BinaryClassificationEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.BinaryEnsemble.Output Add(Microsoft.ML.Legacy.Models.BinaryEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.BinaryEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.BinaryEnsemble input, Microsoft.ML.Legacy.Models.BinaryEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.BinaryEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.BinaryPipelineEnsemble.Output Add(Microsoft.ML.Legacy.Models.BinaryPipelineEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.BinaryPipelineEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.BinaryPipelineEnsemble input, Microsoft.ML.Legacy.Models.BinaryPipelineEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.BinaryPipelineEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.ClassificationEvaluator.Output Add(Microsoft.ML.Legacy.Models.ClassificationEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.ClassificationEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.ClassificationEvaluator input, Microsoft.ML.Legacy.Models.ClassificationEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.ClassificationEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.ClusterEvaluator.Output Add(Microsoft.ML.Legacy.Models.ClusterEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.ClusterEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.ClusterEvaluator input, Microsoft.ML.Legacy.Models.ClusterEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.ClusterEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.CrossValidationResultsCombiner.Output Add(Microsoft.ML.Legacy.Models.CrossValidationResultsCombiner input) - { - var output = new Microsoft.ML.Legacy.Models.CrossValidationResultsCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.CrossValidationResultsCombiner input, Microsoft.ML.Legacy.Models.CrossValidationResultsCombiner.Output output) - { - _jsonNodes.Add(Serialize("Models.CrossValidationResultsCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.CrossValidator.Output Add(Microsoft.ML.Legacy.Models.CrossValidator input) - { - var output = new Microsoft.ML.Legacy.Models.CrossValidator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.CrossValidator input, Microsoft.ML.Legacy.Models.CrossValidator.Output output) - { - _jsonNodes.Add(Serialize("Models.CrossValidator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.CrossValidatorDatasetSplitter.Output Add(Microsoft.ML.Legacy.Models.CrossValidatorDatasetSplitter input) - { - var output = new Microsoft.ML.Legacy.Models.CrossValidatorDatasetSplitter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.CrossValidatorDatasetSplitter input, Microsoft.ML.Legacy.Models.CrossValidatorDatasetSplitter.Output output) - { - _jsonNodes.Add(Serialize("Models.CrossValidatorDatasetSplitter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.DatasetTransformer.Output Add(Microsoft.ML.Legacy.Models.DatasetTransformer input) - { - var output = new Microsoft.ML.Legacy.Models.DatasetTransformer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.DatasetTransformer input, Microsoft.ML.Legacy.Models.DatasetTransformer.Output output) - { - _jsonNodes.Add(Serialize("Models.DatasetTransformer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.EnsembleSummary.Output Add(Microsoft.ML.Legacy.Models.EnsembleSummary input) - { - var output = new Microsoft.ML.Legacy.Models.EnsembleSummary.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.EnsembleSummary input, Microsoft.ML.Legacy.Models.EnsembleSummary.Output output) - { - _jsonNodes.Add(Serialize("Models.EnsembleSummary", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.FixedPlattCalibrator.Output Add(Microsoft.ML.Legacy.Models.FixedPlattCalibrator input) - { - var output = new Microsoft.ML.Legacy.Models.FixedPlattCalibrator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.FixedPlattCalibrator input, Microsoft.ML.Legacy.Models.FixedPlattCalibrator.Output output) - { - _jsonNodes.Add(Serialize("Models.FixedPlattCalibrator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.MultiClassPipelineEnsemble.Output Add(Microsoft.ML.Legacy.Models.MultiClassPipelineEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.MultiClassPipelineEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.MultiClassPipelineEnsemble input, Microsoft.ML.Legacy.Models.MultiClassPipelineEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.MultiClassPipelineEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.MultiOutputRegressionEvaluator.Output Add(Microsoft.ML.Legacy.Models.MultiOutputRegressionEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.MultiOutputRegressionEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.MultiOutputRegressionEvaluator input, Microsoft.ML.Legacy.Models.MultiOutputRegressionEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.MultiOutputRegressionEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.NaiveCalibrator.Output Add(Microsoft.ML.Legacy.Models.NaiveCalibrator input) - { - var output = new Microsoft.ML.Legacy.Models.NaiveCalibrator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.NaiveCalibrator input, Microsoft.ML.Legacy.Models.NaiveCalibrator.Output output) - { - _jsonNodes.Add(Serialize("Models.NaiveCalibrator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.OneVersusAll.Output Add(Microsoft.ML.Legacy.Models.OneVersusAll input) - { - var output = new Microsoft.ML.Legacy.Models.OneVersusAll.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.OneVersusAll input, Microsoft.ML.Legacy.Models.OneVersusAll.Output output) - { - _jsonNodes.Add(Serialize("Models.OneVersusAll", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.OnnxConverter.Output Add(Microsoft.ML.Legacy.Models.OnnxConverter input) - { - var output = new Microsoft.ML.Legacy.Models.OnnxConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.OnnxConverter input, Microsoft.ML.Legacy.Models.OnnxConverter.Output output) - { - _jsonNodes.Add(Serialize("Models.OnnxConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.OvaModelCombiner.Output Add(Microsoft.ML.Legacy.Models.OvaModelCombiner input) - { - var output = new Microsoft.ML.Legacy.Models.OvaModelCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.OvaModelCombiner input, Microsoft.ML.Legacy.Models.OvaModelCombiner.Output output) - { - _jsonNodes.Add(Serialize("Models.OvaModelCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.PAVCalibrator.Output Add(Microsoft.ML.Legacy.Models.PAVCalibrator input) - { - var output = new Microsoft.ML.Legacy.Models.PAVCalibrator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.PAVCalibrator input, Microsoft.ML.Legacy.Models.PAVCalibrator.Output output) - { - _jsonNodes.Add(Serialize("Models.PAVCalibrator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.PlattCalibrator.Output Add(Microsoft.ML.Legacy.Models.PlattCalibrator input) - { - var output = new Microsoft.ML.Legacy.Models.PlattCalibrator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.PlattCalibrator input, Microsoft.ML.Legacy.Models.PlattCalibrator.Output output) - { - _jsonNodes.Add(Serialize("Models.PlattCalibrator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.QuantileRegressionEvaluator.Output Add(Microsoft.ML.Legacy.Models.QuantileRegressionEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.QuantileRegressionEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.QuantileRegressionEvaluator input, Microsoft.ML.Legacy.Models.QuantileRegressionEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.QuantileRegressionEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.RankerEvaluator.Output Add(Microsoft.ML.Legacy.Models.RankerEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.RankerEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.RankerEvaluator input, Microsoft.ML.Legacy.Models.RankerEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.RankerEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.RegressionEnsemble.Output Add(Microsoft.ML.Legacy.Models.RegressionEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.RegressionEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.RegressionEnsemble input, Microsoft.ML.Legacy.Models.RegressionEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.RegressionEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.RegressionEvaluator.Output Add(Microsoft.ML.Legacy.Models.RegressionEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.RegressionEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.RegressionEvaluator input, Microsoft.ML.Legacy.Models.RegressionEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.RegressionEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.RegressionPipelineEnsemble.Output Add(Microsoft.ML.Legacy.Models.RegressionPipelineEnsemble input) - { - var output = new Microsoft.ML.Legacy.Models.RegressionPipelineEnsemble.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.RegressionPipelineEnsemble input, Microsoft.ML.Legacy.Models.RegressionPipelineEnsemble.Output output) - { - _jsonNodes.Add(Serialize("Models.RegressionPipelineEnsemble", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.Summarizer.Output Add(Microsoft.ML.Legacy.Models.Summarizer input) - { - var output = new Microsoft.ML.Legacy.Models.Summarizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.Summarizer input, Microsoft.ML.Legacy.Models.Summarizer.Output output) - { - _jsonNodes.Add(Serialize("Models.Summarizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Models.TrainTestEvaluator.Output Add(Microsoft.ML.Legacy.Models.TrainTestEvaluator input) - { - var output = new Microsoft.ML.Legacy.Models.TrainTestEvaluator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Models.TrainTestEvaluator input, Microsoft.ML.Legacy.Models.TrainTestEvaluator.Output output) - { - _jsonNodes.Add(Serialize("Models.TrainTestEvaluator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.ExponentialAverage.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.ExponentialAverage input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.ExponentialAverage.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.ExponentialAverage input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.ExponentialAverage.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.ExponentialAverage", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidChangePointDetector.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidChangePointDetector input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidChangePointDetector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidChangePointDetector input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidChangePointDetector.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.IidChangePointDetector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidSpikeDetector.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidSpikeDetector input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidSpikeDetector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidSpikeDetector input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.IidSpikeDetector.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.IidSpikeDetector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PercentileThresholdTransform.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PercentileThresholdTransform input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PercentileThresholdTransform.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PercentileThresholdTransform input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PercentileThresholdTransform.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.PercentileThresholdTransform", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PValueTransform.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PValueTransform input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PValueTransform.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PValueTransform input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.PValueTransform.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.PValueTransform", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SlidingWindowTransform.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SlidingWindowTransform input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SlidingWindowTransform.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SlidingWindowTransform input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SlidingWindowTransform.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.SlidingWindowTransform", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaChangePointDetector.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaChangePointDetector input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaChangePointDetector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaChangePointDetector input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaChangePointDetector.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.SsaChangePointDetector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaSpikeDetector.Output Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaSpikeDetector input) - { - var output = new Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaSpikeDetector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaSpikeDetector input, Microsoft.ML.Legacy.TimeSeriesProcessingEntryPoints.SsaSpikeDetector.Output output) - { - _jsonNodes.Add(Serialize("TimeSeriesProcessingEntryPoints.SsaSpikeDetector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.AveragedPerceptronBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.AveragedPerceptronBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.AveragedPerceptronBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.AveragedPerceptronBinaryClassifier input, Microsoft.ML.Legacy.Trainers.AveragedPerceptronBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.AveragedPerceptronBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.EnsembleBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.EnsembleBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.EnsembleBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.EnsembleBinaryClassifier input, Microsoft.ML.Legacy.Trainers.EnsembleBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.EnsembleBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.EnsembleClassification.Output Add(Microsoft.ML.Legacy.Trainers.EnsembleClassification input) - { - var output = new Microsoft.ML.Legacy.Trainers.EnsembleClassification.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.EnsembleClassification input, Microsoft.ML.Legacy.Trainers.EnsembleClassification.Output output) - { - _jsonNodes.Add(Serialize("Trainers.EnsembleClassification", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.EnsembleRegression.Output Add(Microsoft.ML.Legacy.Trainers.EnsembleRegression input) - { - var output = new Microsoft.ML.Legacy.Trainers.EnsembleRegression.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.EnsembleRegression input, Microsoft.ML.Legacy.Trainers.EnsembleRegression.Output output) - { - _jsonNodes.Add(Serialize("Trainers.EnsembleRegression", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastForestBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.FastForestBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastForestBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastForestBinaryClassifier input, Microsoft.ML.Legacy.Trainers.FastForestBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastForestBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastForestRegressor.Output Add(Microsoft.ML.Legacy.Trainers.FastForestRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastForestRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastForestRegressor input, Microsoft.ML.Legacy.Trainers.FastForestRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastForestRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastTreeBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.FastTreeBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastTreeBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastTreeBinaryClassifier input, Microsoft.ML.Legacy.Trainers.FastTreeBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastTreeBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastTreeRanker.Output Add(Microsoft.ML.Legacy.Trainers.FastTreeRanker input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastTreeRanker.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastTreeRanker input, Microsoft.ML.Legacy.Trainers.FastTreeRanker.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastTreeRanker", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastTreeRegressor.Output Add(Microsoft.ML.Legacy.Trainers.FastTreeRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastTreeRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastTreeRegressor input, Microsoft.ML.Legacy.Trainers.FastTreeRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastTreeRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FastTreeTweedieRegressor.Output Add(Microsoft.ML.Legacy.Trainers.FastTreeTweedieRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.FastTreeTweedieRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FastTreeTweedieRegressor input, Microsoft.ML.Legacy.Trainers.FastTreeTweedieRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FastTreeTweedieRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.FieldAwareFactorizationMachineBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.FieldAwareFactorizationMachineBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.FieldAwareFactorizationMachineBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.FieldAwareFactorizationMachineBinaryClassifier input, Microsoft.ML.Legacy.Trainers.FieldAwareFactorizationMachineBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.FieldAwareFactorizationMachineBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelBinaryClassifier input, Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.GeneralizedAdditiveModelBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelRegressor.Output Add(Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelRegressor input, Microsoft.ML.Legacy.Trainers.GeneralizedAdditiveModelRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.GeneralizedAdditiveModelRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.KMeansPlusPlusClusterer.Output Add(Microsoft.ML.Legacy.Trainers.KMeansPlusPlusClusterer input) - { - var output = new Microsoft.ML.Legacy.Trainers.KMeansPlusPlusClusterer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.KMeansPlusPlusClusterer input, Microsoft.ML.Legacy.Trainers.KMeansPlusPlusClusterer.Output output) - { - _jsonNodes.Add(Serialize("Trainers.KMeansPlusPlusClusterer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LightGbmBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.LightGbmBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.LightGbmBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LightGbmBinaryClassifier input, Microsoft.ML.Legacy.Trainers.LightGbmBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LightGbmBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LightGbmClassifier.Output Add(Microsoft.ML.Legacy.Trainers.LightGbmClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.LightGbmClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LightGbmClassifier input, Microsoft.ML.Legacy.Trainers.LightGbmClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LightGbmClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LightGbmRanker.Output Add(Microsoft.ML.Legacy.Trainers.LightGbmRanker input) - { - var output = new Microsoft.ML.Legacy.Trainers.LightGbmRanker.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LightGbmRanker input, Microsoft.ML.Legacy.Trainers.LightGbmRanker.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LightGbmRanker", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LightGbmRegressor.Output Add(Microsoft.ML.Legacy.Trainers.LightGbmRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.LightGbmRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LightGbmRegressor input, Microsoft.ML.Legacy.Trainers.LightGbmRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LightGbmRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LinearSvmBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.LinearSvmBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.LinearSvmBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LinearSvmBinaryClassifier input, Microsoft.ML.Legacy.Trainers.LinearSvmBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LinearSvmBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LogisticRegressionBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.LogisticRegressionBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.LogisticRegressionBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LogisticRegressionBinaryClassifier input, Microsoft.ML.Legacy.Trainers.LogisticRegressionBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LogisticRegressionBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.LogisticRegressionClassifier.Output Add(Microsoft.ML.Legacy.Trainers.LogisticRegressionClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.LogisticRegressionClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.LogisticRegressionClassifier input, Microsoft.ML.Legacy.Trainers.LogisticRegressionClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.LogisticRegressionClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.NaiveBayesClassifier.Output Add(Microsoft.ML.Legacy.Trainers.NaiveBayesClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.NaiveBayesClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.NaiveBayesClassifier input, Microsoft.ML.Legacy.Trainers.NaiveBayesClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.NaiveBayesClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.OnlineGradientDescentRegressor.Output Add(Microsoft.ML.Legacy.Trainers.OnlineGradientDescentRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.OnlineGradientDescentRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.OnlineGradientDescentRegressor input, Microsoft.ML.Legacy.Trainers.OnlineGradientDescentRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.OnlineGradientDescentRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.OrdinaryLeastSquaresRegressor.Output Add(Microsoft.ML.Legacy.Trainers.OrdinaryLeastSquaresRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.OrdinaryLeastSquaresRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.OrdinaryLeastSquaresRegressor input, Microsoft.ML.Legacy.Trainers.OrdinaryLeastSquaresRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.OrdinaryLeastSquaresRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.PcaAnomalyDetector.Output Add(Microsoft.ML.Legacy.Trainers.PcaAnomalyDetector input) - { - var output = new Microsoft.ML.Legacy.Trainers.PcaAnomalyDetector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.PcaAnomalyDetector input, Microsoft.ML.Legacy.Trainers.PcaAnomalyDetector.Output output) - { - _jsonNodes.Add(Serialize("Trainers.PcaAnomalyDetector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.PoissonRegressor.Output Add(Microsoft.ML.Legacy.Trainers.PoissonRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.PoissonRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.PoissonRegressor input, Microsoft.ML.Legacy.Trainers.PoissonRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.PoissonRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier input, Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.StochasticDualCoordinateAscentBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentClassifier.Output Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentClassifier input, Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.StochasticDualCoordinateAscentClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentRegressor input) - { - var output = new Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentRegressor input, Microsoft.ML.Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output output) - { - _jsonNodes.Add(Serialize("Trainers.StochasticDualCoordinateAscentRegressor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.StochasticGradientDescentBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.StochasticGradientDescentBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.StochasticGradientDescentBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.StochasticGradientDescentBinaryClassifier input, Microsoft.ML.Legacy.Trainers.StochasticGradientDescentBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.StochasticGradientDescentBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Trainers.SymSgdBinaryClassifier.Output Add(Microsoft.ML.Legacy.Trainers.SymSgdBinaryClassifier input) - { - var output = new Microsoft.ML.Legacy.Trainers.SymSgdBinaryClassifier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Trainers.SymSgdBinaryClassifier input, Microsoft.ML.Legacy.Trainers.SymSgdBinaryClassifier.Output output) - { - _jsonNodes.Add(Serialize("Trainers.SymSgdBinaryClassifier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ApproximateBootstrapSampler.Output Add(Microsoft.ML.Legacy.Transforms.ApproximateBootstrapSampler input) - { - var output = new Microsoft.ML.Legacy.Transforms.ApproximateBootstrapSampler.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ApproximateBootstrapSampler input, Microsoft.ML.Legacy.Transforms.ApproximateBootstrapSampler.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ApproximateBootstrapSampler", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.BinaryPredictionScoreColumnsRenamer.Output Add(Microsoft.ML.Legacy.Transforms.BinaryPredictionScoreColumnsRenamer input) - { - var output = new Microsoft.ML.Legacy.Transforms.BinaryPredictionScoreColumnsRenamer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.BinaryPredictionScoreColumnsRenamer input, Microsoft.ML.Legacy.Transforms.BinaryPredictionScoreColumnsRenamer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.BinaryPredictionScoreColumnsRenamer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.BinNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.BinNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.BinNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.BinNormalizer input, Microsoft.ML.Legacy.Transforms.BinNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.BinNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.CategoricalHashOneHotVectorizer.Output Add(Microsoft.ML.Legacy.Transforms.CategoricalHashOneHotVectorizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.CategoricalHashOneHotVectorizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.CategoricalHashOneHotVectorizer input, Microsoft.ML.Legacy.Transforms.CategoricalHashOneHotVectorizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.CategoricalHashOneHotVectorizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.CategoricalOneHotVectorizer.Output Add(Microsoft.ML.Legacy.Transforms.CategoricalOneHotVectorizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.CategoricalOneHotVectorizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.CategoricalOneHotVectorizer input, Microsoft.ML.Legacy.Transforms.CategoricalOneHotVectorizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.CategoricalOneHotVectorizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.CharacterTokenizer.Output Add(Microsoft.ML.Legacy.Transforms.CharacterTokenizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.CharacterTokenizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.CharacterTokenizer input, Microsoft.ML.Legacy.Transforms.CharacterTokenizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.CharacterTokenizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ColumnConcatenator.Output Add(Microsoft.ML.Legacy.Transforms.ColumnConcatenator input) - { - var output = new Microsoft.ML.Legacy.Transforms.ColumnConcatenator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ColumnConcatenator input, Microsoft.ML.Legacy.Transforms.ColumnConcatenator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ColumnConcatenator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ColumnCopier.Output Add(Microsoft.ML.Legacy.Transforms.ColumnCopier input) - { - var output = new Microsoft.ML.Legacy.Transforms.ColumnCopier.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ColumnCopier input, Microsoft.ML.Legacy.Transforms.ColumnCopier.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ColumnCopier", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ColumnSelector.Output Add(Microsoft.ML.Legacy.Transforms.ColumnSelector input) - { - var output = new Microsoft.ML.Legacy.Transforms.ColumnSelector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ColumnSelector input, Microsoft.ML.Legacy.Transforms.ColumnSelector.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ColumnSelector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ColumnTypeConverter.Output Add(Microsoft.ML.Legacy.Transforms.ColumnTypeConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.ColumnTypeConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ColumnTypeConverter input, Microsoft.ML.Legacy.Transforms.ColumnTypeConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ColumnTypeConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.CombinerByContiguousGroupId.Output Add(Microsoft.ML.Legacy.Transforms.CombinerByContiguousGroupId input) - { - var output = new Microsoft.ML.Legacy.Transforms.CombinerByContiguousGroupId.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.CombinerByContiguousGroupId input, Microsoft.ML.Legacy.Transforms.CombinerByContiguousGroupId.Output output) - { - _jsonNodes.Add(Serialize("Transforms.CombinerByContiguousGroupId", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ConditionalNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.ConditionalNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.ConditionalNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ConditionalNormalizer input, Microsoft.ML.Legacy.Transforms.ConditionalNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ConditionalNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.DataCache.Output Add(Microsoft.ML.Legacy.Transforms.DataCache input) - { - var output = new Microsoft.ML.Legacy.Transforms.DataCache.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.DataCache input, Microsoft.ML.Legacy.Transforms.DataCache.Output output) - { - _jsonNodes.Add(Serialize("Transforms.DataCache", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.DatasetScorer.Output Add(Microsoft.ML.Legacy.Transforms.DatasetScorer input) - { - var output = new Microsoft.ML.Legacy.Transforms.DatasetScorer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.DatasetScorer input, Microsoft.ML.Legacy.Transforms.DatasetScorer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.DatasetScorer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.DatasetTransformScorer.Output Add(Microsoft.ML.Legacy.Transforms.DatasetTransformScorer input) - { - var output = new Microsoft.ML.Legacy.Transforms.DatasetTransformScorer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.DatasetTransformScorer input, Microsoft.ML.Legacy.Transforms.DatasetTransformScorer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.DatasetTransformScorer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.Dictionarizer.Output Add(Microsoft.ML.Legacy.Transforms.Dictionarizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.Dictionarizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.Dictionarizer input, Microsoft.ML.Legacy.Transforms.Dictionarizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.Dictionarizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.FeatureCombiner.Output Add(Microsoft.ML.Legacy.Transforms.FeatureCombiner input) - { - var output = new Microsoft.ML.Legacy.Transforms.FeatureCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.FeatureCombiner input, Microsoft.ML.Legacy.Transforms.FeatureCombiner.Output output) - { - _jsonNodes.Add(Serialize("Transforms.FeatureCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.FeatureContributionCalculationTransformer.Output Add(Microsoft.ML.Legacy.Transforms.FeatureContributionCalculationTransformer input) - { - var output = new Microsoft.ML.Legacy.Transforms.FeatureContributionCalculationTransformer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.FeatureContributionCalculationTransformer input, Microsoft.ML.Legacy.Transforms.FeatureContributionCalculationTransformer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.FeatureContributionCalculationTransformer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.FeatureSelectorByCount.Output Add(Microsoft.ML.Legacy.Transforms.FeatureSelectorByCount input) - { - var output = new Microsoft.ML.Legacy.Transforms.FeatureSelectorByCount.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.FeatureSelectorByCount input, Microsoft.ML.Legacy.Transforms.FeatureSelectorByCount.Output output) - { - _jsonNodes.Add(Serialize("Transforms.FeatureSelectorByCount", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.FeatureSelectorByMutualInformation.Output Add(Microsoft.ML.Legacy.Transforms.FeatureSelectorByMutualInformation input) - { - var output = new Microsoft.ML.Legacy.Transforms.FeatureSelectorByMutualInformation.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.FeatureSelectorByMutualInformation input, Microsoft.ML.Legacy.Transforms.FeatureSelectorByMutualInformation.Output output) - { - _jsonNodes.Add(Serialize("Transforms.FeatureSelectorByMutualInformation", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.GlobalContrastNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.GlobalContrastNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.GlobalContrastNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.GlobalContrastNormalizer input, Microsoft.ML.Legacy.Transforms.GlobalContrastNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.GlobalContrastNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.HashConverter.Output Add(Microsoft.ML.Legacy.Transforms.HashConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.HashConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.HashConverter input, Microsoft.ML.Legacy.Transforms.HashConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.HashConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ImageGrayscale.Output Add(Microsoft.ML.Legacy.Transforms.ImageGrayscale input) - { - var output = new Microsoft.ML.Legacy.Transforms.ImageGrayscale.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ImageGrayscale input, Microsoft.ML.Legacy.Transforms.ImageGrayscale.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ImageGrayscale", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ImageLoader.Output Add(Microsoft.ML.Legacy.Transforms.ImageLoader input) - { - var output = new Microsoft.ML.Legacy.Transforms.ImageLoader.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ImageLoader input, Microsoft.ML.Legacy.Transforms.ImageLoader.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ImageLoader", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ImagePixelExtractor.Output Add(Microsoft.ML.Legacy.Transforms.ImagePixelExtractor input) - { - var output = new Microsoft.ML.Legacy.Transforms.ImagePixelExtractor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ImagePixelExtractor input, Microsoft.ML.Legacy.Transforms.ImagePixelExtractor.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ImagePixelExtractor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ImageResizer.Output Add(Microsoft.ML.Legacy.Transforms.ImageResizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.ImageResizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ImageResizer input, Microsoft.ML.Legacy.Transforms.ImageResizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ImageResizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.KeyToTextConverter.Output Add(Microsoft.ML.Legacy.Transforms.KeyToTextConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.KeyToTextConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.KeyToTextConverter input, Microsoft.ML.Legacy.Transforms.KeyToTextConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.KeyToTextConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LabelColumnKeyBooleanConverter.Output Add(Microsoft.ML.Legacy.Transforms.LabelColumnKeyBooleanConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.LabelColumnKeyBooleanConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LabelColumnKeyBooleanConverter input, Microsoft.ML.Legacy.Transforms.LabelColumnKeyBooleanConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LabelColumnKeyBooleanConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LabelIndicator.Output Add(Microsoft.ML.Legacy.Transforms.LabelIndicator input) - { - var output = new Microsoft.ML.Legacy.Transforms.LabelIndicator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LabelIndicator input, Microsoft.ML.Legacy.Transforms.LabelIndicator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LabelIndicator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LabelToFloatConverter.Output Add(Microsoft.ML.Legacy.Transforms.LabelToFloatConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.LabelToFloatConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LabelToFloatConverter input, Microsoft.ML.Legacy.Transforms.LabelToFloatConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LabelToFloatConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LightLda.Output Add(Microsoft.ML.Legacy.Transforms.LightLda input) - { - var output = new Microsoft.ML.Legacy.Transforms.LightLda.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LightLda input, Microsoft.ML.Legacy.Transforms.LightLda.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LightLda", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LogMeanVarianceNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.LogMeanVarianceNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.LogMeanVarianceNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LogMeanVarianceNormalizer input, Microsoft.ML.Legacy.Transforms.LogMeanVarianceNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LogMeanVarianceNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.LpNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.LpNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.LpNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.LpNormalizer input, Microsoft.ML.Legacy.Transforms.LpNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.LpNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ManyHeterogeneousModelCombiner.Output Add(Microsoft.ML.Legacy.Transforms.ManyHeterogeneousModelCombiner input) - { - var output = new Microsoft.ML.Legacy.Transforms.ManyHeterogeneousModelCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ManyHeterogeneousModelCombiner input, Microsoft.ML.Legacy.Transforms.ManyHeterogeneousModelCombiner.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ManyHeterogeneousModelCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MeanVarianceNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.MeanVarianceNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.MeanVarianceNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MeanVarianceNormalizer input, Microsoft.ML.Legacy.Transforms.MeanVarianceNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MeanVarianceNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MinMaxNormalizer.Output Add(Microsoft.ML.Legacy.Transforms.MinMaxNormalizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.MinMaxNormalizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MinMaxNormalizer input, Microsoft.ML.Legacy.Transforms.MinMaxNormalizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MinMaxNormalizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MissingValueHandler.Output Add(Microsoft.ML.Legacy.Transforms.MissingValueHandler input) - { - var output = new Microsoft.ML.Legacy.Transforms.MissingValueHandler.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MissingValueHandler input, Microsoft.ML.Legacy.Transforms.MissingValueHandler.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MissingValueHandler", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MissingValueIndicator.Output Add(Microsoft.ML.Legacy.Transforms.MissingValueIndicator input) - { - var output = new Microsoft.ML.Legacy.Transforms.MissingValueIndicator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MissingValueIndicator input, Microsoft.ML.Legacy.Transforms.MissingValueIndicator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MissingValueIndicator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MissingValuesDropper.Output Add(Microsoft.ML.Legacy.Transforms.MissingValuesDropper input) - { - var output = new Microsoft.ML.Legacy.Transforms.MissingValuesDropper.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MissingValuesDropper input, Microsoft.ML.Legacy.Transforms.MissingValuesDropper.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MissingValuesDropper", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MissingValuesRowDropper.Output Add(Microsoft.ML.Legacy.Transforms.MissingValuesRowDropper input) - { - var output = new Microsoft.ML.Legacy.Transforms.MissingValuesRowDropper.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MissingValuesRowDropper input, Microsoft.ML.Legacy.Transforms.MissingValuesRowDropper.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MissingValuesRowDropper", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.MissingValueSubstitutor.Output Add(Microsoft.ML.Legacy.Transforms.MissingValueSubstitutor input) - { - var output = new Microsoft.ML.Legacy.Transforms.MissingValueSubstitutor.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.MissingValueSubstitutor input, Microsoft.ML.Legacy.Transforms.MissingValueSubstitutor.Output output) - { - _jsonNodes.Add(Serialize("Transforms.MissingValueSubstitutor", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ModelCombiner.Output Add(Microsoft.ML.Legacy.Transforms.ModelCombiner input) - { - var output = new Microsoft.ML.Legacy.Transforms.ModelCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ModelCombiner input, Microsoft.ML.Legacy.Transforms.ModelCombiner.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ModelCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.NGramTranslator.Output Add(Microsoft.ML.Legacy.Transforms.NGramTranslator input) - { - var output = new Microsoft.ML.Legacy.Transforms.NGramTranslator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.NGramTranslator input, Microsoft.ML.Legacy.Transforms.NGramTranslator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.NGramTranslator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.NoOperation.Output Add(Microsoft.ML.Legacy.Transforms.NoOperation input) - { - var output = new Microsoft.ML.Legacy.Transforms.NoOperation.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.NoOperation input, Microsoft.ML.Legacy.Transforms.NoOperation.Output output) - { - _jsonNodes.Add(Serialize("Transforms.NoOperation", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.OptionalColumnCreator.Output Add(Microsoft.ML.Legacy.Transforms.OptionalColumnCreator input) - { - var output = new Microsoft.ML.Legacy.Transforms.OptionalColumnCreator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.OptionalColumnCreator input, Microsoft.ML.Legacy.Transforms.OptionalColumnCreator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.OptionalColumnCreator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.PcaCalculator.Output Add(Microsoft.ML.Legacy.Transforms.PcaCalculator input) - { - var output = new Microsoft.ML.Legacy.Transforms.PcaCalculator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.PcaCalculator input, Microsoft.ML.Legacy.Transforms.PcaCalculator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.PcaCalculator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.PredictedLabelColumnOriginalValueConverter.Output Add(Microsoft.ML.Legacy.Transforms.PredictedLabelColumnOriginalValueConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.PredictedLabelColumnOriginalValueConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.PredictedLabelColumnOriginalValueConverter input, Microsoft.ML.Legacy.Transforms.PredictedLabelColumnOriginalValueConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.PredictedLabelColumnOriginalValueConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.RandomNumberGenerator.Output Add(Microsoft.ML.Legacy.Transforms.RandomNumberGenerator input) - { - var output = new Microsoft.ML.Legacy.Transforms.RandomNumberGenerator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.RandomNumberGenerator input, Microsoft.ML.Legacy.Transforms.RandomNumberGenerator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.RandomNumberGenerator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.RowRangeFilter.Output Add(Microsoft.ML.Legacy.Transforms.RowRangeFilter input) - { - var output = new Microsoft.ML.Legacy.Transforms.RowRangeFilter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.RowRangeFilter input, Microsoft.ML.Legacy.Transforms.RowRangeFilter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.RowRangeFilter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.RowSkipAndTakeFilter.Output Add(Microsoft.ML.Legacy.Transforms.RowSkipAndTakeFilter input) - { - var output = new Microsoft.ML.Legacy.Transforms.RowSkipAndTakeFilter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.RowSkipAndTakeFilter input, Microsoft.ML.Legacy.Transforms.RowSkipAndTakeFilter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.RowSkipAndTakeFilter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.RowSkipFilter.Output Add(Microsoft.ML.Legacy.Transforms.RowSkipFilter input) - { - var output = new Microsoft.ML.Legacy.Transforms.RowSkipFilter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.RowSkipFilter input, Microsoft.ML.Legacy.Transforms.RowSkipFilter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.RowSkipFilter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.RowTakeFilter.Output Add(Microsoft.ML.Legacy.Transforms.RowTakeFilter input) - { - var output = new Microsoft.ML.Legacy.Transforms.RowTakeFilter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.RowTakeFilter input, Microsoft.ML.Legacy.Transforms.RowTakeFilter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.RowTakeFilter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.ScoreColumnSelector.Output Add(Microsoft.ML.Legacy.Transforms.ScoreColumnSelector input) - { - var output = new Microsoft.ML.Legacy.Transforms.ScoreColumnSelector.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.ScoreColumnSelector input, Microsoft.ML.Legacy.Transforms.ScoreColumnSelector.Output output) - { - _jsonNodes.Add(Serialize("Transforms.ScoreColumnSelector", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.Scorer.Output Add(Microsoft.ML.Legacy.Transforms.Scorer input) - { - var output = new Microsoft.ML.Legacy.Transforms.Scorer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.Scorer input, Microsoft.ML.Legacy.Transforms.Scorer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.Scorer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.Segregator.Output Add(Microsoft.ML.Legacy.Transforms.Segregator input) - { - var output = new Microsoft.ML.Legacy.Transforms.Segregator.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.Segregator input, Microsoft.ML.Legacy.Transforms.Segregator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.Segregator", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input) - { - var output = new Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input, Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.SentimentAnalyzer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TensorFlowScorer.Output Add(Microsoft.ML.Legacy.Transforms.TensorFlowScorer input) - { - var output = new Microsoft.ML.Legacy.Transforms.TensorFlowScorer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TensorFlowScorer input, Microsoft.ML.Legacy.Transforms.TensorFlowScorer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TensorFlowScorer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TextFeaturizer.Output Add(Microsoft.ML.Legacy.Transforms.TextFeaturizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.TextFeaturizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TextFeaturizer input, Microsoft.ML.Legacy.Transforms.TextFeaturizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TextFeaturizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TextToKeyConverter.Output Add(Microsoft.ML.Legacy.Transforms.TextToKeyConverter input) - { - var output = new Microsoft.ML.Legacy.Transforms.TextToKeyConverter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TextToKeyConverter input, Microsoft.ML.Legacy.Transforms.TextToKeyConverter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TextToKeyConverter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TrainTestDatasetSplitter.Output Add(Microsoft.ML.Legacy.Transforms.TrainTestDatasetSplitter input) - { - var output = new Microsoft.ML.Legacy.Transforms.TrainTestDatasetSplitter.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TrainTestDatasetSplitter input, Microsoft.ML.Legacy.Transforms.TrainTestDatasetSplitter.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TrainTestDatasetSplitter", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TreeLeafFeaturizer.Output Add(Microsoft.ML.Legacy.Transforms.TreeLeafFeaturizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.TreeLeafFeaturizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TreeLeafFeaturizer input, Microsoft.ML.Legacy.Transforms.TreeLeafFeaturizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TreeLeafFeaturizer", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.TwoHeterogeneousModelCombiner.Output Add(Microsoft.ML.Legacy.Transforms.TwoHeterogeneousModelCombiner input) - { - var output = new Microsoft.ML.Legacy.Transforms.TwoHeterogeneousModelCombiner.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.TwoHeterogeneousModelCombiner input, Microsoft.ML.Legacy.Transforms.TwoHeterogeneousModelCombiner.Output output) - { - _jsonNodes.Add(Serialize("Transforms.TwoHeterogeneousModelCombiner", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.VectorToImage.Output Add(Microsoft.ML.Legacy.Transforms.VectorToImage input) - { - var output = new Microsoft.ML.Legacy.Transforms.VectorToImage.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.VectorToImage input, Microsoft.ML.Legacy.Transforms.VectorToImage.Output output) - { - _jsonNodes.Add(Serialize("Transforms.VectorToImage", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.WordEmbeddings.Output Add(Microsoft.ML.Legacy.Transforms.WordEmbeddings input) - { - var output = new Microsoft.ML.Legacy.Transforms.WordEmbeddings.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.WordEmbeddings input, Microsoft.ML.Legacy.Transforms.WordEmbeddings.Output output) - { - _jsonNodes.Add(Serialize("Transforms.WordEmbeddings", input, output)); - } - - [Obsolete] - public Microsoft.ML.Legacy.Transforms.WordTokenizer.Output Add(Microsoft.ML.Legacy.Transforms.WordTokenizer input) - { - var output = new Microsoft.ML.Legacy.Transforms.WordTokenizer.Output(); - Add(input, output); - return output; - } - - [Obsolete] - public void Add(Microsoft.ML.Legacy.Transforms.WordTokenizer input, Microsoft.ML.Legacy.Transforms.WordTokenizer.Output output) - { - _jsonNodes.Add(Serialize("Transforms.WordTokenizer", input, output)); - } - - } - namespace Legacy.Data - { - - /// - /// Import a dataset from a text file - /// - [Obsolete] - public sealed partial class CustomTextLoader - { - - - /// - /// Location of the input file - /// - [Obsolete] - public Var InputFile { get; set; } = new Var(); - - /// - /// Custom schema to use for parsing - /// - [Obsolete] - public string CustomSchema { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// The resulting data view - /// - public Var Data { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Data - { - - /// - /// Pass dataview from memory to experiment - /// - [Obsolete] - public sealed partial class DataViewReference - { - - - /// - /// Pointer to IDataView in memory - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// The resulting data view - /// - public Var Data { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Data - { - - /// - /// Create an array variable of IDataView - /// - [Obsolete] - public sealed partial class IDataViewArrayConverter - { - - - /// - /// The data sets - /// - [Obsolete] - public ArrayVar Data { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output - { - /// - /// The data set array - /// - public ArrayVar OutputData { get; set; } = new ArrayVar(); - - } - } - } - - namespace Legacy.Data - { - - /// - /// Create an array variable of PredictorModel - /// - [Obsolete] - public sealed partial class PredictorModelArrayConverter - { - - - /// - /// The models - /// - [Obsolete] - public ArrayVar Model { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output - { - /// - /// The model array - /// - public ArrayVar OutputModel { get; set; } = new ArrayVar(); - - } - } - } - - namespace Legacy.Data - { - - [Obsolete] - public enum DataKind : byte - { - I1 = 1, - U1 = 2, - I2 = 3, - U2 = 4, - I4 = 5, - U4 = 6, - I8 = 7, - U8 = 8, - R4 = 9, - Num = 9, - R8 = 10, - TX = 11, - Text = 11, - TXT = 11, - BL = 12, - Bool = 12, - TimeSpan = 13, - TS = 13, - DT = 14, - DateTime = 14, - DZ = 15, - DateTimeZone = 15, - UG = 16, - U16 = 16 - } - - [Obsolete] - public sealed partial class TextLoaderRange - { - /// - /// First index in the range - /// - [Obsolete] - public int Min { get; set; } - - /// - /// Last index in the range - /// - [Obsolete] - public int? Max { get; set; } - - /// - /// This range extends to the end of the line, but should be a fixed number of items - /// - [Obsolete] - public bool AutoEnd { get; set; } = false; - - /// - /// This range extends to the end of the line, which can vary from line to line - /// - [Obsolete] - public bool VariableEnd { get; set; } = false; - - /// - /// This range includes only other indices not specified - /// - [Obsolete] - public bool AllOther { get; set; } = false; - - /// - /// Force scalar columns to be treated as vectors of length one - /// - [Obsolete] - public bool ForceVector { get; set; } = false; - - } - - [Obsolete] - public sealed partial class KeyRange - { - /// - /// First index in the range - /// - [Obsolete] - public ulong Min { get; set; } = 0; - - /// - /// Last index in the range - /// - [Obsolete] - public ulong? Max { get; set; } - - /// - /// Whether the key is contiguous - /// - [Obsolete] - public bool Contiguous { get; set; } = true; - - } - - [Obsolete] - public sealed partial class TextLoaderColumn - { - /// - /// Name of the column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Type of the items in the column - /// - [Obsolete] - public DataKind? Type { get; set; } - - /// - /// Source index range(s) of the column - /// - [Obsolete] - public TextLoaderRange[] Source { get; set; } - - /// - /// For a key column, this defines the range of values - /// - [Obsolete] - public KeyRange KeyRange { get; set; } - - } - - [Obsolete] - public sealed partial class TextLoaderArguments - { - /// - /// Use separate parsing threads? - /// - [Obsolete] - public bool UseThreads { get; set; } = true; - - /// - /// File containing a header with feature names. If specified, header defined in the data file (header+) is ignored. - /// - [Obsolete] - public string HeaderFile { get; set; } - - /// - /// Maximum number of rows to produce - /// - [Obsolete] - public long? MaxRows { get; set; } - - /// - /// Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by "". When false, consecutive separators denote an empty value. - /// - [Obsolete] - public bool AllowQuoting { get; set; } = true; - - /// - /// Whether the input may include sparse representations - /// - [Obsolete] - public bool AllowSparse { get; set; } = true; - - /// - /// Number of source columns in the text data. Default is that sparse rows contain their size information. - /// - [Obsolete] - public int? InputSize { get; set; } - - /// - /// Source column separator. - /// - [Obsolete] - public char[] Separator { get; set; } = { '\t' }; - - /// - /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40 - /// - [Obsolete] - public TextLoaderColumn[] Column { get; set; } - - /// - /// Remove trailing whitespace from lines - /// - [Obsolete] - public bool TrimWhitespace { get; set; } = false; - - /// - /// Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified. - /// - [Obsolete] - public bool HasHeader { get; set; } = false; - - } - - /// - /// Import a dataset from a text file - /// - [Obsolete] - public sealed partial class TextLoader : Microsoft.ML.Legacy.ILearningPipelineLoader - { - - [Obsolete] - [JsonIgnore] - private string _inputFilePath = null; - public TextLoader(string filePath) - { - _inputFilePath = filePath; - } - - [Obsolete] - public void SetInput(IHostEnvironment env, Experiment experiment) - { - IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false); - experiment.SetInput(InputFile, inputFile); - } - - [Obsolete] - public Var GetInputData() => null; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - Contracts.Assert(previousStep == null); - - return new TextLoaderPipelineStep(experiment.Add(this)); - } - - [Obsolete] - private class TextLoaderPipelineStep : ILearningPipelineDataStep - { - public TextLoaderPipelineStep (Output output) - { - Data = output.Data; - Model = null; - } - - public Var Data { get; } - public Var Model { get; } - } - - /// - /// Location of the input file - /// - [Obsolete] - public Var InputFile { get; set; } = new Var(); - - /// - /// Arguments - /// - [Obsolete] - public TextLoaderArguments Arguments { get; set; } = new TextLoaderArguments(); - - - [Obsolete] - public sealed class Output - { - /// - /// The resulting data view - /// - public Var Data { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates an anomaly detection scored dataset. - /// - [Obsolete] - public sealed partial class AnomalyDetectionEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Expected number of false positives - /// - [Obsolete] - public int K { get; set; } = 10; - - /// - /// Expected false positive rate - /// - [Obsolete] - public double P { get; set; } = 0.01d; - - /// - /// Number of top-scored predictions to display - /// - [Obsolete] - public int NumTopResults { get; set; } = 50; - - /// - /// Whether to calculate metrics in one pass - /// - [Obsolete] - public bool Stream { get; set; } = true; - - /// - /// The number of samples to use for AUC calculation. If 0, AUC is not computed. If -1, the whole dataset is used - /// - [Obsolete] - public int MaxAucExamples { get; set; } = -1; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - [Obsolete] - public enum EnsembleCreatorScoreCombiner - { - Median = 0, - Average = 1 - } - - - /// - /// Combine anomaly detection models into an ensemble - /// - [Obsolete] - public sealed partial class AnomalyPipelineEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorScoreCombiner ModelCombiner { get; set; } = EnsembleCreatorScoreCombiner.Average; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IAnomalyDetectionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a binary classification scored dataset. - /// - [Obsolete] - public sealed partial class BinaryClassificationEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Probability column name - /// - [Obsolete] - public string ProbabilityColumn { get; set; } - - /// - /// Probability value for classification thresholding - /// - [Obsolete] - public float Threshold { get; set; } - - /// - /// Use raw score value instead of probability for classification thresholding - /// - [Obsolete] - public bool UseRawScoreThreshold { get; set; } = true; - - /// - /// The number of samples to use for p/r curve generation. Specify 0 for no p/r curve generation - /// - [Obsolete] - public int NumRocExamples { get; set; } = 100000; - - /// - /// The number of samples to use for AUC calculation. If 0, AUC is not computed. If -1, the whole dataset is used - /// - [Obsolete] - public int MaxAucExamples { get; set; } = -1; - - /// - /// The number of samples to use for AUPRC calculation. Specify 0 for no AUPRC calculation - /// - [Obsolete] - public int NumAuPrcExamples { get; set; } = 100000; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IClassificationEvaluatorOutput, Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Confusion matrix dataset - /// - public Var ConfusionMatrix { get; set; } = new Var(); - - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - [Obsolete] - public enum EnsembleCreatorClassifierCombiner - { - Median = 0, - Average = 1, - Vote = 2 - } - - - /// - /// Combine binary classifiers into an ensemble - /// - [Obsolete] - public sealed partial class BinaryEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorClassifierCombiner ModelCombiner { get; set; } = EnsembleCreatorClassifierCombiner.Median; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - /// - /// Whether to validate that all the pipelines are identical - /// - [Obsolete] - public bool ValidatePipelines { get; set; } = true; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Combine binary classification models into an ensemble - /// - [Obsolete] - public sealed partial class BinaryPipelineEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorClassifierCombiner ModelCombiner { get; set; } = EnsembleCreatorClassifierCombiner.Median; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a multi class classification scored dataset. - /// - [Obsolete] - public sealed partial class ClassificationEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Output top-K accuracy. - /// - [Obsolete] - public int? OutputTopKAcc { get; set; } - - /// - /// Output top-K classes. - /// - [Obsolete] - public int NumTopClassesToOutput { get; set; } = 3; - - /// - /// Maximum number of classes in confusion matrix. - /// - [Obsolete] - public int NumClassesConfusionMatrix { get; set; } = 10; - - /// - /// Output per class statistics and confusion matrix. - /// - [Obsolete] - public bool OutputPerClassStatistics { get; set; } = false; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IClassificationEvaluatorOutput, Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Confusion matrix dataset - /// - public Var ConfusionMatrix { get; set; } = new Var(); - - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a clustering scored dataset. - /// - [Obsolete] - public sealed partial class ClusterEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Features column name - /// - [Obsolete] - public string FeatureColumn { get; set; } - - /// - /// Calculate DBI? (time-consuming unsupervised metric) - /// - [Obsolete] - public bool CalculateDbi { get; set; } = false; - - /// - /// Output top K clusters - /// - [Obsolete] - public int NumTopClustersToOutput { get; set; } = 3; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - [Obsolete] - public enum MacroUtilsTrainerKinds - { - SignatureBinaryClassifierTrainer = 0, - SignatureMultiClassClassifierTrainer = 1, - SignatureRankerTrainer = 2, - SignatureRegressorTrainer = 3, - SignatureMultiOutputRegressorTrainer = 4, - SignatureAnomalyDetectorTrainer = 5, - SignatureClusteringTrainer = 6 - } - - - /// - /// Combine the metric data views returned from cross validation. - /// - [Obsolete] - public sealed partial class CrossValidationResultsCombiner - { - - - /// - /// Overall metrics datasets - /// - [Obsolete] - public ArrayVar OverallMetrics { get; set; } = new ArrayVar(); - - /// - /// Per instance metrics datasets - /// - [Obsolete] - public ArrayVar PerInstanceMetrics { get; set; } = new ArrayVar(); - - /// - /// Confusion matrix datasets - /// - [Obsolete] - public ArrayVar ConfusionMatrix { get; set; } = new ArrayVar(); - - /// - /// Warning datasets - /// - [Obsolete] - public ArrayVar Warnings { get; set; } = new ArrayVar(); - - /// - /// The label column name - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for grouping - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupColumn { get; set; } - - /// - /// Name column name - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional NameColumn { get; set; } - - /// - /// Specifies the trainer kind, which determines the evaluator to be used. - /// - [Obsolete] - public MacroUtilsTrainerKinds Kind { get; set; } = MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; - - - [Obsolete] - public sealed class Output - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - /// - /// Confusion matrix dataset - /// - public Var ConfusionMatrix { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - [Obsolete] - public sealed partial class CrossValidationMacroSubGraphInput - { - /// - /// The data to be used for training - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - } - - [Obsolete] - public sealed partial class CrossValidationMacroSubGraphOutput - { - /// - /// The predictor model - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - } - - /// - /// Cross validation for general learning - /// - [Obsolete] - public sealed partial class CrossValidator - { - - - /// - /// The data set - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// The transform model from the pipeline before this command. It gets included in the Output.PredictorModel. - /// - [Obsolete] - public Var TransformModel { get; set; } = new Var(); - - /// - /// The training subgraph - /// - [Obsolete] - public Experiment Nodes { get; set; } - - /// - /// The training subgraph inputs - /// - [Obsolete] - public CrossValidationMacroSubGraphInput Inputs { get; set; } = new CrossValidationMacroSubGraphInput(); - - /// - /// The training subgraph outputs - /// - [Obsolete] - public CrossValidationMacroSubGraphOutput Outputs { get; set; } = new CrossValidationMacroSubGraphOutput(); - - /// - /// Column to use for stratification - /// - [Obsolete] - public string StratificationColumn { get; set; } - - /// - /// Number of folds in k-fold cross-validation - /// - [Obsolete] - public int NumFolds { get; set; } = 2; - - /// - /// Specifies the trainer kind, which determines the evaluator to be used. - /// - [Obsolete] - public MacroUtilsTrainerKinds Kind { get; set; } = MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for grouping - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupColumn { get; set; } - - /// - /// Name column name - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional NameColumn { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel. - /// - public ArrayVar PredictorModel { get; set; } = new ArrayVar(); - - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - /// - /// Confusion matrix dataset - /// - public Var ConfusionMatrix { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Split the dataset into the specified number of cross-validation folds (train and test sets) - /// - [Obsolete] - public sealed partial class CrossValidatorDatasetSplitter - { - - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Number of folds to split into - /// - [Obsolete] - public int NumFolds { get; set; } = 2; - - /// - /// Stratification column - /// - [Obsolete] - public string StratificationColumn { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// Training data (one dataset per fold) - /// - public ArrayVar TrainData { get; set; } = new ArrayVar(); - - /// - /// Testing data (one dataset per fold) - /// - public ArrayVar TestData { get; set; } = new ArrayVar(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Applies a TransformModel to a dataset. - /// - [Obsolete] - public sealed partial class DatasetTransformer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Transform model - /// - [Obsolete] - public Var TransformModel { get; set; } = new Var(); - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(DatasetTransformer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new DatasetTransformerPipelineStep(output); - } - - [Obsolete] - private class DatasetTransformerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public DatasetTransformerPipelineStep(Output output) - { - Data = output.OutputData; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - - /// - /// Summarize a pipeline ensemble predictor. - /// - [Obsolete] - public sealed partial class EnsembleSummary - { - - - /// - /// The predictor to summarize - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// The summaries of the individual predictors - /// - public ArrayVar Summaries { get; set; } = new ArrayVar(); - - /// - /// The model statistics of the individual predictors - /// - public ArrayVar Stats { get; set; } = new ArrayVar(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Apply a Platt calibrator with a fixed slope and offset to an input model - /// - [Obsolete] - public sealed partial class FixedPlattCalibrator : Microsoft.ML.EntryPoints.CommonInputs.ICalibratorInput, Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The slope parameter of the calibration function 1 / (1 + exp(-slope * x + offset) - /// - [Obsolete] - public double Slope { get; set; } = 1d; - - /// - /// The offset parameter of the calibration function 1 / (1 + exp(-slope * x + offset) - /// - [Obsolete] - public double Offset { get; set; } - - /// - /// The predictor to calibrate - /// - [Obsolete] - public Var UncalibratedPredictorModel { get; set; } = new Var(); - - /// - /// The maximum number of examples to train the calibrator on - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [Obsolete] - public int MaxRows { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ICalibratorOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FixedPlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new FixedPlattCalibratorPipelineStep(output); - } - - [Obsolete] - private class FixedPlattCalibratorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FixedPlattCalibratorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - - /// - /// Combine multiclass classifiers into an ensemble - /// - [Obsolete] - public sealed partial class MultiClassPipelineEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorClassifierCombiner ModelCombiner { get; set; } = EnsembleCreatorClassifierCombiner.Median; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a multi output regression scored dataset. - /// - [Obsolete] - public sealed partial class MultiOutputRegressionEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Loss function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public RegressionLossFunction LossFunction { get; set; } = new SquaredLossRegressionLossFunction(); - - /// - /// Supress labels and scores in per-instance outputs? - /// - [Obsolete] - public bool SupressScoresAndLabels { get; set; } = false; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Apply a Naive calibrator to an input model - /// - [Obsolete] - public sealed partial class NaiveCalibrator : Microsoft.ML.EntryPoints.CommonInputs.ICalibratorInput, Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predictor to calibrate - /// - [Obsolete] - public Var UncalibratedPredictorModel { get; set; } = new Var(); - - /// - /// The maximum number of examples to train the calibrator on - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [Obsolete] - public int MaxRows { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ICalibratorOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(NaiveCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new NaiveCalibratorPipelineStep(output); - } - - [Obsolete] - private class NaiveCalibratorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public NaiveCalibratorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - [Obsolete] - public enum NormalizeOption - { - No = 0, - Warn = 1, - Auto = 2, - Yes = 3 - } - - [Obsolete] - public enum CachingOptions - { - Auto = 0, - Memory = 1, - Disk = 2, - None = 3 - } - - - [Obsolete] - public sealed partial class OneVersusAllMacroSubGraphOutput - { - /// - /// The predictor model for the subgraph exemplar. - /// - [Obsolete] - public Var Model { get; set; } = new Var(); - - } - - /// - [Obsolete] - public sealed partial class OneVersusAll : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The subgraph for the binary trainer used to construct the OVA learner. This should be a TrainBinary node. - /// - [Obsolete] - public Experiment Nodes { get; set; } - - /// - /// The training subgraph output. - /// - [Obsolete] - public OneVersusAllMacroSubGraphOutput OutputForSubGraph { get; set; } = new OneVersusAllMacroSubGraphOutput(); - - /// - /// Use probabilities in OVA combiner - /// - [Obsolete] - public bool UseProbabilities { get; set; } = true; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public NormalizeOption NormalizeFeatures { get; set; } = NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public CachingOptions Caching { get; set; } = CachingOptions.Auto; - - - [Obsolete] - public sealed class Output - { - /// - /// The trained multiclass model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new OneVersusAllPipelineStep(output); - } - - [Obsolete] - private class OneVersusAllPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public OneVersusAllPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - [Obsolete] - public enum OnnxVersion - { - Stable = 0, - Experimental = 1 - } - - - /// - /// Converts the model to ONNX format. - /// - [Obsolete] - public sealed partial class OnnxConverter - { - - - /// - /// The path to write the output ONNX to. - /// - [Obsolete] - public string Onnx { get; set; } - - /// - /// The path to write the output JSON to. - /// - [Obsolete] - public string Json { get; set; } - - /// - /// The 'name' property in the output ONNX. By default this will be the ONNX extension-less name. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The 'domain' property in the output ONNX. - /// - [Obsolete] - public string Domain { get; set; } - - /// - /// Array of input column names to drop - /// - [Obsolete] - public string[] InputsToDrop { get; set; } - - /// - /// Array of output column names to drop - /// - [Obsolete] - public string[] OutputsToDrop { get; set; } - - /// - /// Model that needs to be converted to ONNX format. - /// - [Obsolete] - public Var Model { get; set; } = new Var(); - - /// - /// The targeted ONNX version. It can be either "Stable" or "Experimental". If "Experimental" is used, produced model can contain components that is not officially supported in ONNX standard. - /// - [Obsolete] - public OnnxVersion OnnxVersion { get; set; } = OnnxVersion.Stable; - - /// - /// The data file - /// - [Obsolete] - public string DataFile { get; set; } - - - [Obsolete] - public sealed class Output - { - } - } - } - - namespace Legacy.Models - { - - /// - /// Combines a sequence of PredictorModels into a single model - /// - [Obsolete] - public sealed partial class OvaModelCombiner : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Input models - /// - [Obsolete] - public ArrayVar ModelArray { get; set; } = new ArrayVar(); - - /// - /// Use probabilities from learners instead of raw values. - /// - [Obsolete] - public bool UseProbabilities { get; set; } = true; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public NormalizeOption NormalizeFeatures { get; set; } = NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public CachingOptions Caching { get; set; } = CachingOptions.Auto; - - - [Obsolete] - public sealed class Output - { - /// - /// Predictor model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OvaModelCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new OvaModelCombinerPipelineStep(output); - } - - [Obsolete] - private class OvaModelCombinerPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public OvaModelCombinerPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - - /// - /// Apply a PAV calibrator to an input model - /// - [Obsolete] - public sealed partial class PAVCalibrator : Microsoft.ML.EntryPoints.CommonInputs.ICalibratorInput, Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predictor to calibrate - /// - [Obsolete] - public Var UncalibratedPredictorModel { get; set; } = new Var(); - - /// - /// The maximum number of examples to train the calibrator on - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [Obsolete] - public int MaxRows { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ICalibratorOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PAVCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PAVCalibratorPipelineStep(output); - } - - [Obsolete] - private class PAVCalibratorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public PAVCalibratorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - - /// - /// Apply a Platt calibrator to an input model - /// - [Obsolete] - public sealed partial class PlattCalibrator : Microsoft.ML.EntryPoints.CommonInputs.ICalibratorInput, Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predictor to calibrate - /// - [Obsolete] - public Var UncalibratedPredictorModel { get; set; } = new Var(); - - /// - /// The maximum number of examples to train the calibrator on - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [Obsolete] - public int MaxRows { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ICalibratorOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PlattCalibratorPipelineStep(output); - } - - [Obsolete] - private class PlattCalibratorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public PlattCalibratorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a quantile regression scored dataset. - /// - [Obsolete] - public sealed partial class QuantileRegressionEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Loss function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public RegressionLossFunction LossFunction { get; set; } = new SquaredLossRegressionLossFunction(); - - /// - /// Quantile index to select - /// - [Obsolete] - public int? Index { get; set; } - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a ranking scored dataset. - /// - [Obsolete] - public sealed partial class RankerEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Column to use for the group ID - /// - [Obsolete] - public string GroupIdColumn { get; set; } - - /// - /// Maximum truncation level for computing (N)DCG - /// - [Obsolete] - public int DcgTruncationLevel { get; set; } = 3; - - /// - /// Label relevance gains - /// - [Obsolete] - public string LabelGains { get; set; } = "0,3,7,15,31"; - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Combine regression models into an ensemble - /// - [Obsolete] - public sealed partial class RegressionEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorScoreCombiner ModelCombiner { get; set; } = EnsembleCreatorScoreCombiner.Median; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - /// - /// Whether to validate that all the pipelines are identical - /// - [Obsolete] - public bool ValidatePipelines { get; set; } = true; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Evaluates a regression scored dataset. - /// - [Obsolete] - public sealed partial class RegressionEvaluator : Microsoft.ML.EntryPoints.CommonInputs.IEvaluatorInput - { - - - /// - /// Loss function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public RegressionLossFunction LossFunction { get; set; } = new SquaredLossRegressionLossFunction(); - - /// - /// Column to use for labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Weight column name. - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// Score column name. - /// - [Obsolete] - public string ScoreColumn { get; set; } - - /// - /// Stratification column name. - /// - [Obsolete] - public string[] StratColumn { get; set; } - - /// - /// The data to be used for evaluation. - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Name column name. - /// - [Obsolete] - public string NameColumn { get; set; } = "Name"; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IEvaluatorOutput - { - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Combine regression models into an ensemble - /// - [Obsolete] - public sealed partial class RegressionPipelineEnsemble - { - - - /// - /// The combiner used to combine the scores - /// - [Obsolete] - public EnsembleCreatorScoreCombiner ModelCombiner { get; set; } = EnsembleCreatorScoreCombiner.Median; - - /// - /// The models to combine into an ensemble - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - /// - /// Summarize a linear regression predictor. - /// - [Obsolete] - public sealed partial class Summarizer - { - - - /// - /// The predictor to summarize - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// The summary of a predictor - /// - public Var Summary { get; set; } = new Var(); - - /// - /// The training set statistics. Note that this output can be null. - /// - public Var Stats { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Models - { - - [Obsolete] - public sealed partial class TrainTestMacroSubGraphInput - { - /// - /// The data to be used for training - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - } - - [Obsolete] - public sealed partial class TrainTestMacroSubGraphOutput - { - /// - /// The predictor model - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - } - - /// - /// General train test for any supported evaluator - /// - [Obsolete] - public sealed partial class TrainTestEvaluator - { - - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// The data to be used for testing - /// - [Obsolete] - public Var TestingData { get; set; } = new Var(); - - /// - /// The aggregated transform model from the pipeline before this command, to apply to the test data, and also include in the final model, together with the predictor model. - /// - [Obsolete] - public Var TransformModel { get; set; } = new Var(); - - /// - /// The training subgraph - /// - [Obsolete] - public Experiment Nodes { get; set; } - - /// - /// The training subgraph inputs - /// - [Obsolete] - public TrainTestMacroSubGraphInput Inputs { get; set; } = new TrainTestMacroSubGraphInput(); - - /// - /// The training subgraph outputs - /// - [Obsolete] - public TrainTestMacroSubGraphOutput Outputs { get; set; } = new TrainTestMacroSubGraphOutput(); - - /// - /// Specifies the trainer kind, which determines the evaluator to be used. - /// - [Obsolete] - public MacroUtilsTrainerKinds Kind { get; set; } = MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; - - /// - /// Identifies which pipeline was run for this train test. - /// - [Obsolete] - public string PipelineId { get; set; } - - /// - /// Indicates whether to include and output training dataset metrics. - /// - [Obsolete] - public bool IncludeTrainingMetrics { get; set; } = false; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for grouping - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupColumn { get; set; } - - /// - /// Name column name - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional NameColumn { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel. - /// - public Var PredictorModel { get; set; } = new Var(); - - /// - /// Warning dataset - /// - public Var Warnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset - /// - public Var OverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset - /// - public Var PerInstanceMetrics { get; set; } = new Var(); - - /// - /// Confusion matrix dataset - /// - public Var ConfusionMatrix { get; set; } = new Var(); - - /// - /// Warning dataset for training - /// - public Var TrainingWarnings { get; set; } = new Var(); - - /// - /// Overall metrics dataset for training - /// - public Var TrainingOverallMetrics { get; set; } = new Var(); - - /// - /// Per instance metrics dataset for training - /// - public Var TrainingPerInstanceMetrics { get; set; } = new Var(); - - /// - /// Confusion matrix dataset for training - /// - public Var TrainingConfusionMatrix { get; set; } = new Var(); - - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - - /// - /// Applies a Exponential average on a time series. - /// - [Obsolete] - public sealed partial class ExponentialAverage : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Coefficient d in: d m(y_t) = d * y_t + (1-d) * m(y_(t-1)), it should be in [0, 1]. - /// - [Obsolete] - public float Decay { get; set; } = 0.9f; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ExponentialAverage)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ExponentialAveragePipelineStep(output); - } - - [Obsolete] - private class ExponentialAveragePipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ExponentialAveragePipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - [Obsolete] - public enum SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateMartingaleType : byte - { - None = 0, - Power = 1, - Mixture = 2 - } - - - /// - /// This transform detects the change-points in an i.i.d. sequence using adaptive kernel density estimation and martingales. - /// - [Obsolete] - public sealed partial class IidChangePointDetector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column. - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The length of the sliding window on p-values for computing the martingale score. - /// - [Obsolete] - public int ChangeHistoryLength { get; set; } = 20; - - /// - /// The confidence for change point detection in the range [0, 100]. - /// - [Obsolete] - public double Confidence { get; set; } = 95d; - - /// - /// The martingale used for scoring. - /// - [Obsolete] - public SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateMartingaleType Martingale { get; set; } = SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateMartingaleType.Power; - - /// - /// The epsilon parameter for the Power martingale. - /// - [Obsolete] - public double PowerMartingaleEpsilon { get; set; } = 0.1d; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(IidChangePointDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new IidChangePointDetectorPipelineStep(output); - } - - [Obsolete] - private class IidChangePointDetectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public IidChangePointDetectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - [Obsolete] - public enum SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateAnomalySide : byte - { - Positive = 0, - Negative = 1, - TwoSided = 2 - } - - - /// - /// This transform detects the spikes in a i.i.d. sequence using adaptive kernel density estimation. - /// - [Obsolete] - public sealed partial class IidSpikeDetector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column. - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The argument that determines whether to detect positive or negative anomalies, or both. - /// - [Obsolete] - public SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateAnomalySide Side { get; set; } = SequentialAnomalyDetectionTransformBaseSingleIidAnomalyDetectionBaseStateAnomalySide.TwoSided; - - /// - /// The size of the sliding window for computing the p-value. - /// - [Obsolete] - public int PvalueHistoryLength { get; set; } = 100; - - /// - /// The confidence for spike detection in the range [0, 100]. - /// - [Obsolete] - public double Confidence { get; set; } = 99d; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(IidSpikeDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new IidSpikeDetectorPipelineStep(output); - } - - [Obsolete] - private class IidSpikeDetectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public IidSpikeDetectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - - /// - /// Detects the values of time-series that are in the top percentile of the sliding window. - /// - [Obsolete] - public sealed partial class PercentileThresholdTransform : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The percentile value for thresholding in the range [0, 100] - /// - [Obsolete] - public double Percentile { get; set; } = 1d; - - /// - /// The size of the sliding window for computing the percentile threshold. The default value is set to 1. - /// - [Obsolete] - public int WindowSize { get; set; } = 1; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PercentileThresholdTransform)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PercentileThresholdTransformPipelineStep(output); - } - - [Obsolete] - private class PercentileThresholdTransformPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public PercentileThresholdTransformPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - - /// - /// This P-Value transform calculates the p-value of the current input in the sequence with regard to the values in the sliding window. - /// - [Obsolete] - public sealed partial class PValueTransform : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The seed value of the random generator - /// - [Obsolete] - public int Seed { get; set; } - - /// - /// The flag that determines whether the p-values are calculated on the positive side - /// - [Obsolete] - public bool PositiveSide { get; set; } = true; - - /// - /// The size of the sliding window for computing the p-value - /// - [Obsolete] - public int WindowSize { get; set; } = 1; - - /// - /// The size of the initial window for computing the p-value. The default value is set to 0, which means there is no initial window considered. - /// - [Obsolete] - public int InitialWindowSize { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PValueTransform)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PValueTransformPipelineStep(output); - } - - [Obsolete] - private class PValueTransformPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public PValueTransformPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - [Obsolete] - public enum SlidingWindowTransformBaseSingleBeginOptions : byte - { - NaNValues = 0, - FirstValue = 1 - } - - - /// - /// Returns the last values for a time series [y(t-d-l+1), y(t-d-l+2), ..., y(t-l-1), y(t-l)] where d is the size of the window, l the lag and y is a Float. - /// - [Obsolete] - public sealed partial class SlidingWindowTransform : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The size of the sliding window for computing the moving average - /// - [Obsolete] - public int WindowSize { get; set; } = 2; - - /// - /// Lag between current observation and last observation from the sliding window - /// - [Obsolete] - public int Lag { get; set; } = 1; - - /// - /// Define how to populate the first rows of the produced series - /// - [Obsolete] - public SlidingWindowTransformBaseSingleBeginOptions Begin { get; set; } = SlidingWindowTransformBaseSingleBeginOptions.NaNValues; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(SlidingWindowTransform)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SlidingWindowTransformPipelineStep(output); - } - - [Obsolete] - private class SlidingWindowTransformPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public SlidingWindowTransformPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - [Obsolete] - public enum ErrorFunctionUtilsErrorFunction : byte - { - SignedDifference = 0, - AbsoluteDifference = 1, - SignedProportion = 2, - AbsoluteProportion = 3, - SquaredDifference = 4 - } - - [Obsolete] - public enum SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateMartingaleType : byte - { - None = 0, - Power = 1, - Mixture = 2 - } - - - /// - /// This transform detects the change-points in a seasonal time-series using Singular Spectrum Analysis (SSA). - /// - [Obsolete] - public sealed partial class SsaChangePointDetector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column. - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The length of the sliding window on p-values for computing the martingale score. - /// - [Obsolete] - public int ChangeHistoryLength { get; set; } = 20; - - /// - /// The number of points from the beginning of the sequence used for training. - /// - [Obsolete] - public int TrainingWindowSize { get; set; } = 100; - - /// - /// The confidence for change point detection in the range [0, 100]. - /// - [Obsolete] - public double Confidence { get; set; } = 95d; - - /// - /// An upper bound on the largest relevant seasonality in the input time-series. - /// - [Obsolete] - public int SeasonalWindowSize { get; set; } = 10; - - /// - /// The function used to compute the error between the expected and the observed value. - /// - [Obsolete] - public ErrorFunctionUtilsErrorFunction ErrorFunction { get; set; } = ErrorFunctionUtilsErrorFunction.SignedDifference; - - /// - /// The martingale used for scoring. - /// - [Obsolete] - public SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateMartingaleType Martingale { get; set; } = SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateMartingaleType.Power; - - /// - /// The epsilon parameter for the Power martingale. - /// - [Obsolete] - public double PowerMartingaleEpsilon { get; set; } = 0.1d; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(SsaChangePointDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SsaChangePointDetectorPipelineStep(output); - } - - [Obsolete] - private class SsaChangePointDetectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public SsaChangePointDetectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.TimeSeriesProcessingEntryPoints - { - [Obsolete] - public enum SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateAnomalySide : byte - { - Positive = 0, - Negative = 1, - TwoSided = 2 - } - - - /// - /// This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA). - /// - [Obsolete] - public sealed partial class SsaSpikeDetector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The name of the source column. - /// - [Obsolete] - public string Source { get; set; } - - /// - /// The name of the new column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// The argument that determines whether to detect positive or negative anomalies, or both. - /// - [Obsolete] - public SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateAnomalySide Side { get; set; } = SequentialAnomalyDetectionTransformBaseSingleSsaAnomalyDetectionBaseStateAnomalySide.TwoSided; - - /// - /// The size of the sliding window for computing the p-value. - /// - [Obsolete] - public int PvalueHistoryLength { get; set; } = 100; - - /// - /// The number of points from the beginning of the sequence used for training. - /// - [Obsolete] - public int TrainingWindowSize { get; set; } = 100; - - /// - /// The confidence for spike detection in the range [0, 100]. - /// - [Obsolete] - public double Confidence { get; set; } = 99d; - - /// - /// An upper bound on the largest relevant seasonality in the input time-series. - /// - [Obsolete] - public int SeasonalWindowSize { get; set; } = 10; - - /// - /// The function used to compute the error between the expected and the observed value. - /// - [Obsolete] - public ErrorFunctionUtilsErrorFunction ErrorFunction { get; set; } = ErrorFunctionUtilsErrorFunction.SignedDifference; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(SsaSpikeDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SsaSpikeDetectorPipelineStep(output); - } - - [Obsolete] - private class SsaSpikeDetectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public SsaSpikeDetectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ClassificationLossFunction LossFunction { get; set; } = new HingeLossClassificationLossFunction(); - - /// - /// The calibrator kind to apply to the predictor. Specify null for no calibration - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public CalibratorTrainer Calibrator { get; set; } = new PlattCalibratorCalibratorTrainer(); - - /// - /// The maximum number of examples to use when training the calibrator - /// - [Obsolete] - public int MaxCalibrationExamples { get; set; } = 1000000; - - /// - /// Learning rate - /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] - [Obsolete] - public float LearningRate { get; set; } = 1f; - - /// - /// Decrease learning rate - /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] - [Obsolete] - public bool DecreaseLearningRate { get; set; } = false; - - /// - /// Number of examples after which weights will be reset to the current average - /// - [Obsolete] - public long? ResetWeightsAfterXExamples { get; set; } - - /// - /// Instead of updating averaged weights on every example, only update when loss is nonzero - /// - [Obsolete] - public bool DoLazyUpdates { get; set; } = true; - - /// - /// L2 Regularization Weight - /// - [TlcModule.SweepableFloatParamAttribute("L2RegularizerWeight", 0f, 0.4f)] - [Obsolete] - public float L2RegularizerWeight { get; set; } - - /// - /// Extra weight given to more recent updates - /// - [Obsolete] - public float RecencyGain { get; set; } - - /// - /// Whether Recency Gain is multiplicative (vs. additive) - /// - [Obsolete] - public bool RecencyGainMulti { get; set; } = false; - - /// - /// Do averaging? - /// - [Obsolete] - public bool Averaged { get; set; } = true; - - /// - /// The inexactness tolerance for averaging - /// - [Obsolete] - public float AveragedTolerance { get; set; } = 0.01f; - - /// - /// Number of iterations - /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] - [Obsolete] - public int NumIterations { get; set; } = 1; - - /// - /// Initial Weights and bias, comma-separated - /// - [Obsolete] - public string InitialWeights { get; set; } - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether to shuffle for each training iteration - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Size of cache when trained in Scope - /// - [Obsolete] - public int StreamingCacheSize { get; set; } = 1000000; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(AveragedPerceptronBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new AveragedPerceptronBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class AveragedPerceptronBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public AveragedPerceptronBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Train binary ensemble. - /// - [Obsolete] - public sealed partial class EnsembleBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Algorithm to prune the base learners for selective Ensemble - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleBinarySubModelSelector SubModelSelectorType { get; set; } = new AllSelectorEnsembleBinarySubModelSelector(); - - /// - /// Output combiner - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleBinaryOutputCombiner OutputCombiner { get; set; } = new MedianEnsembleBinaryOutputCombiner(); - - /// - /// Number of models per batch. If not specified, will default to 50 if there is only one base predictor, or the number of base predictors otherwise. - /// - [Obsolete] - public int? NumModels { get; set; } - - /// - /// Batch size - /// - [Obsolete] - public int BatchSize { get; set; } = -1; - - /// - /// Sampling Type - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleSubsetSelector SamplingType { get; set; } = new BootstrapSelectorEnsembleSubsetSelector(); - - /// - /// All the base learners will run asynchronously if the value is true - /// - [Obsolete] - public bool TrainParallel { get; set; } = false; - - /// - /// True, if metrics for each model need to be evaluated and shown in comparison table. This is done by using validation set if available or the training set - /// - [Obsolete] - public bool ShowMetrics { get; set; } = false; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(EnsembleBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new EnsembleBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class EnsembleBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public EnsembleBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Train multiclass ensemble. - /// - [Obsolete] - public sealed partial class EnsembleClassification : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Algorithm to prune the base learners for selective Ensemble - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleMulticlassSubModelSelector SubModelSelectorType { get; set; } = new AllSelectorMultiClassEnsembleMulticlassSubModelSelector(); - - /// - /// Output combiner - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleMulticlassOutputCombiner OutputCombiner { get; set; } = new MultiMedianEnsembleMulticlassOutputCombiner(); - - /// - /// Number of models per batch. If not specified, will default to 50 if there is only one base predictor, or the number of base predictors otherwise. - /// - [Obsolete] - public int? NumModels { get; set; } - - /// - /// Batch size - /// - [Obsolete] - public int BatchSize { get; set; } = -1; - - /// - /// Sampling Type - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleSubsetSelector SamplingType { get; set; } = new BootstrapSelectorEnsembleSubsetSelector(); - - /// - /// All the base learners will run asynchronously if the value is true - /// - [Obsolete] - public bool TrainParallel { get; set; } = false; - - /// - /// True, if metrics for each model need to be evaluated and shown in comparison table. This is done by using validation set if available or the training set - /// - [Obsolete] - public bool ShowMetrics { get; set; } = false; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(EnsembleClassification)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new EnsembleClassificationPipelineStep(output); - } - - [Obsolete] - private class EnsembleClassificationPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public EnsembleClassificationPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Train regression ensemble. - /// - [Obsolete] - public sealed partial class EnsembleRegression : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Algorithm to prune the base learners for selective Ensemble - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleRegressionSubModelSelector SubModelSelectorType { get; set; } = new AllSelectorEnsembleRegressionSubModelSelector(); - - /// - /// Output combiner - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleRegressionOutputCombiner OutputCombiner { get; set; } = new MedianEnsembleRegressionOutputCombiner(); - - /// - /// Number of models per batch. If not specified, will default to 50 if there is only one base predictor, or the number of base predictors otherwise. - /// - [Obsolete] - public int? NumModels { get; set; } - - /// - /// Batch size - /// - [Obsolete] - public int BatchSize { get; set; } = -1; - - /// - /// Sampling Type - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleSubsetSelector SamplingType { get; set; } = new BootstrapSelectorEnsembleSubsetSelector(); - - /// - /// All the base learners will run asynchronously if the value is true - /// - [Obsolete] - public bool TrainParallel { get; set; } = false; - - /// - /// True, if metrics for each model need to be evaluated and shown in comparison table. This is done by using validation set if available or the training set - /// - [Obsolete] - public bool ShowMetrics { get; set; } = false; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(EnsembleRegression)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new EnsembleRegressionPipelineStep(output); - } - - [Obsolete] - private class EnsembleRegressionPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public EnsembleRegressionPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - [Obsolete] - public enum Bundle : byte - { - None = 0, - AggregateLowPopulation = 1, - Adjacent = 2 - } - - - /// - /// - [Obsolete] - public sealed partial class FastForestBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// The calibrator kind to apply to the predictor. Specify null for no calibration - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public CalibratorTrainer Calibrator { get; set; } = new PlattCalibratorCalibratorTrainer(); - - /// - /// The maximum number of examples to use when training the calibrator - /// - [Obsolete] - public int MaxCalibrationExamples { get; set; } = 1000000; - - /// - /// Number of labels to be sampled from each leaf to make the distribtuion - /// - [Obsolete] - public int QuantileSampleCount { get; set; } = 100; - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 0.7d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } = 1; - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 0.7d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastForestBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastForestBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class FastForestBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastForestBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class FastForestRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Shuffle the labels on every iteration. Useful probably only if using this tree as a tree leaf featurizer for multiclass. - /// - [Obsolete] - public bool ShuffleLabels { get; set; } = false; - - /// - /// Number of labels to be sampled from each leaf to make the distribtuion - /// - [Obsolete] - public int QuantileSampleCount { get; set; } = 100; - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 0.7d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } = 1; - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 0.7d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastForestRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastForestRegressorPipelineStep(output); - } - - [Obsolete] - private class FastForestRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastForestRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - [Obsolete] - public enum BoostedTreeArgsOptimizationAlgorithmType - { - GradientDescent = 0, - AcceleratedGradientDescent = 1, - ConjugateGradientDescent = 2 - } - - - /// - /// - [Obsolete] - public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Should we use derivatives optimized for unbalanced sets - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastTreeBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastTreeBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class FastTreeBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastTreeBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class FastTreeRanker : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31"; - - /// - /// Train DCG instead of NDCG - /// - [Obsolete] - public bool TrainDcg { get; set; } = false; - - /// - /// The sorting algorithm to use for DCG and LambdaMart calculations [DescendingStablePessimistic/DescendingStable/DescendingReverse/DescendingDotNet] - /// - [Obsolete] - public string SortingAlgorithm { get; set; } = "DescendingStablePessimistic"; - - /// - /// max-NDCG truncation to use in the Lambda Mart algorithm - /// - [Obsolete] - public int LambdaMartMaxTruncation { get; set; } = 100; - - /// - /// Use shifted NDCG - /// - [Obsolete] - public bool ShiftedNdcg { get; set; } = false; - - /// - /// Cost function parameter (w/c) - /// - [Obsolete] - public char CostFunctionParam { get; set; } = 'w'; - - /// - /// Distance weight 2 adjustment to cost - /// - [Obsolete] - public bool DistanceWeight2 { get; set; } = false; - - /// - /// Normalize query lambdas - /// - [Obsolete] - public bool NormalizeQueryLambdas { get; set; } = false; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } = 1; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRankingOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastTreeRanker)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastTreeRankerPipelineStep(output); - } - - [Obsolete] - private class FastTreeRankerPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastTreeRankerPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class FastTreeRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } = 1; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastTreeRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastTreeRegressorPipelineStep(output); - } - - [Obsolete] - private class FastTreeRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastTreeRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - [Obsolete] - public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Index parameter for the Tweedie distribution, in the range [1, 2]. 1 is Poisson loss, 2 is gamma loss, and intermediate values are compound Poisson loss. - /// - [Obsolete] - public double Index { get; set; } = 1.5d; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Bundle Bundling { get; set; } = Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FastTreeTweedieRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FastTreeTweedieRegressorPipelineStep(output); - } - - [Obsolete] - private class FastTreeTweedieRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FastTreeTweedieRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class FieldAwareFactorizationMachineBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Initial learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRate", 0.001f, 1f, isLogScale:true)] - [Obsolete] - public float LearningRate { get; set; } = 0.1f; - - /// - /// Number of training iterations - /// - [TlcModule.SweepableLongParamAttribute("Iters", 1, 100)] - [Obsolete] - public int Iters { get; set; } = 5; - - /// - /// Latent space dimension - /// - [TlcModule.SweepableLongParamAttribute("LatentDim", 4, 100)] - [Obsolete] - public int LatentDim { get; set; } = 20; - - /// - /// Regularization coefficient of linear weights - /// - [TlcModule.SweepableFloatParamAttribute("LambdaLinear", 1E-08f, 1f, isLogScale:true)] - [Obsolete] - public float LambdaLinear { get; set; } = 0.0001f; - - /// - /// Regularization coefficient of latent weights - /// - [TlcModule.SweepableFloatParamAttribute("LambdaLatent", 1E-08f, 1f, isLogScale:true)] - [Obsolete] - public float LambdaLatent { get; set; } = 0.0001f; - - /// - /// Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length - /// - [Obsolete] - public bool Norm { get; set; } = true; - - /// - /// Whether to shuffle for each training iteration - /// - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Report traning progress or not - /// - [Obsolete] - public bool Verbose { get; set; } = true; - - /// - /// Radius of initial latent factors - /// - [TlcModule.SweepableFloatParamAttribute("Radius", 0.1f, 1f)] - [Obsolete] - public float Radius { get; set; } = 0.5f; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FieldAwareFactorizationMachineBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new FieldAwareFactorizationMachineBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class FieldAwareFactorizationMachineBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public FieldAwareFactorizationMachineBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. - /// - [Obsolete] - public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Should we use derivatives optimized for unbalanced sets - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public int GainConfidenceLevel { get; set; } - - /// - /// Total number of iterations over all features - /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] - [Obsolete] - public int NumIterations { get; set; } = 9500; - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.002d; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Upper bound on absolute value of single output - /// - [Obsolete] - public double MaxOutput { get; set; } = double.PositiveInfinity; - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// Minimum number of training instances required to form a partition - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocuments { get; set; } = 10; - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = true; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new GeneralizedAdditiveModelBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class GeneralizedAdditiveModelBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public GeneralizedAdditiveModelBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. - /// - [Obsolete] - public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Metric for pruning. (For regression, 1: L1, 2:L2; default L2) - /// - [Obsolete] - public int PruningMetrics { get; set; } = 2; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public int GainConfidenceLevel { get; set; } - - /// - /// Total number of iterations over all features - /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] - [Obsolete] - public int NumIterations { get; set; } = 9500; - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.002d; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Upper bound on absolute value of single output - /// - [Obsolete] - public double MaxOutput { get; set; } = double.PositiveInfinity; - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// Minimum number of training instances required to form a partition - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocuments { get; set; } = 10; - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = true; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new GeneralizedAdditiveModelRegressorPipelineStep(output); - } - - [Obsolete] - private class GeneralizedAdditiveModelRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public GeneralizedAdditiveModelRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - [Obsolete] - public enum KMeansPlusPlusTrainerInitAlgorithm - { - KMeansPlusPlus = 0, - Random = 1, - KMeansParallel = 2 - } - - - /// - /// - [Obsolete] - public sealed partial class KMeansPlusPlusClusterer : Microsoft.ML.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The number of clusters - /// - [TlcModule.SweepableDiscreteParamAttribute("K", new object[]{5, 10, 20, 40})] - [Obsolete] - public int K { get; set; } = 5; - - /// - /// Cluster initialization algorithm - /// - [Obsolete] - public KMeansPlusPlusTrainerInitAlgorithm InitAlgorithm { get; set; } = KMeansPlusPlusTrainerInitAlgorithm.KMeansParallel; - - /// - /// Tolerance parameter for trainer convergence. Low = slower, more accurate - /// - [Obsolete] - public float OptTol { get; set; } = 1E-07f; - - /// - /// Maximum number of iterations. - /// - [Obsolete] - public int MaxIterations { get; set; } = 1000; - - /// - /// Memory budget (in MBs) to use for KMeans acceleration - /// - [Obsolete] - public int AccelMemBudgetMb { get; set; } = 4096; - - /// - /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed. - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IClusteringOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(KMeansPlusPlusClusterer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new KMeansPlusPlusClustererPipelineStep(output); - } - - [Obsolete] - private class KMeansPlusPlusClustererPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public KMeansPlusPlusClustererPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - [Obsolete] - public enum LightGbmArgumentsEvalMetricType - { - DefaultMetric = 0, - Rmse = 1, - Mae = 2, - Logloss = 3, - Error = 4, - Merror = 5, - Mlogloss = 6, - Auc = 7, - Ndcg = 8, - Map = 9 - } - - - /// - /// - [Obsolete] - public sealed partial class LightGbmBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of iterations. - /// - [TlcModule.SweepableDiscreteParamAttribute("NumBoostRound", new object[]{10, 20, 50, 100, 150, 200})] - [Obsolete] - public int NumBoostRound { get; set; } = 100; - - /// - /// Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1]. - /// - [TlcModule.SweepableFloatParamAttribute("LearningRate", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double? LearningRate { get; set; } - - /// - /// Maximum leaves for trees. - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int? NumLeaves { get; set; } - - /// - /// Minimum number of instances needed in a child. - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[]{1, 10, 20, 50})] - [Obsolete] - public int? MinDataPerLeaf { get; set; } - - /// - /// Max number of bucket bin for features. - /// - [Obsolete] - public int MaxBin { get; set; } = 255; - - /// - /// Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function. - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public BoosterParameterFunction Booster { get; set; } = new GbdtBoosterParameterFunction(); - - /// - /// Verbose - /// - [Obsolete] - public bool VerboseEval { get; set; } = false; - - /// - /// Printing running messages. - /// - [Obsolete] - public bool Silent { get; set; } = true; - - /// - /// Number of parallel threads used to run LightGBM. - /// - [Obsolete] - public int? NThread { get; set; } - - /// - /// Evaluation metrics. - /// - [Obsolete] - public LightGbmArgumentsEvalMetricType EvalMetric { get; set; } = LightGbmArgumentsEvalMetricType.DefaultMetric; - - /// - /// Use softmax loss for the multi classification. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseSoftmax", new object[]{true, false})] - [Obsolete] - public bool? UseSoftmax { get; set; } - - /// - /// Rounds of early stopping, 0 will disable it. - /// - [Obsolete] - public int EarlyStoppingRound { get; set; } - - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31,63,127,255,511,1023,2047,4095"; - - /// - /// Parameter for the sigmoid function. Used only in LightGbmBinaryTrainer, LightGbmMulticlassTrainer and in LightGbmRankingTrainer. - /// - [Obsolete] - public double Sigmoid { get; set; } = 0.5d; - - /// - /// Number of entries in a batch when loading data. - /// - [Obsolete] - public int BatchSize { get; set; } = 1048576; - - /// - /// Enable categorical split or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseCat", new object[]{true, false})] - [Obsolete] - public bool? UseCat { get; set; } - - /// - /// Enable missing value auto infer or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseMissing", new object[]{true, false})] - [Obsolete] - public bool UseMissing { get; set; } = false; - - /// - /// Min number of instances per categorical group. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerGroup", new object[]{10, 50, 100, 200})] - [Obsolete] - public int MinDataPerGroup { get; set; } = 100; - - /// - /// Max number of categorical thresholds. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MaxCatThreshold", new object[]{8, 16, 32, 64})] - [Obsolete] - public int MaxCatThreshold { get; set; } = 32; - - /// - /// Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatSmooth", new object[]{1, 10, 20})] - [Obsolete] - public double CatSmooth { get; set; } = 10d; - - /// - /// L2 Regularization for categorical split. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatL2", new object[]{0.1f, 0.5f, 1, 5, 10})] - [Obsolete] - public double CatL2 { get; set; } = 10d; - - /// - /// Parallel LightGBM Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelLightGBM ParallelTrainer { get; set; } = new SingleParallelLightGBM(); - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LightGbmBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LightGbmBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class LightGbmBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LightGbmBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class LightGbmClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of iterations. - /// - [TlcModule.SweepableDiscreteParamAttribute("NumBoostRound", new object[]{10, 20, 50, 100, 150, 200})] - [Obsolete] - public int NumBoostRound { get; set; } = 100; - - /// - /// Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1]. - /// - [TlcModule.SweepableFloatParamAttribute("LearningRate", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double? LearningRate { get; set; } - - /// - /// Maximum leaves for trees. - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int? NumLeaves { get; set; } - - /// - /// Minimum number of instances needed in a child. - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[]{1, 10, 20, 50})] - [Obsolete] - public int? MinDataPerLeaf { get; set; } - - /// - /// Max number of bucket bin for features. - /// - [Obsolete] - public int MaxBin { get; set; } = 255; - - /// - /// Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function. - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public BoosterParameterFunction Booster { get; set; } = new GbdtBoosterParameterFunction(); - - /// - /// Verbose - /// - [Obsolete] - public bool VerboseEval { get; set; } = false; - - /// - /// Printing running messages. - /// - [Obsolete] - public bool Silent { get; set; } = true; - - /// - /// Number of parallel threads used to run LightGBM. - /// - [Obsolete] - public int? NThread { get; set; } - - /// - /// Evaluation metrics. - /// - [Obsolete] - public LightGbmArgumentsEvalMetricType EvalMetric { get; set; } = LightGbmArgumentsEvalMetricType.DefaultMetric; - - /// - /// Use softmax loss for the multi classification. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseSoftmax", new object[]{true, false})] - [Obsolete] - public bool? UseSoftmax { get; set; } - - /// - /// Rounds of early stopping, 0 will disable it. - /// - [Obsolete] - public int EarlyStoppingRound { get; set; } - - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31,63,127,255,511,1023,2047,4095"; - - /// - /// Parameter for the sigmoid function. Used only in LightGbmBinaryTrainer, LightGbmMulticlassTrainer and in LightGbmRankingTrainer. - /// - [Obsolete] - public double Sigmoid { get; set; } = 0.5d; - - /// - /// Number of entries in a batch when loading data. - /// - [Obsolete] - public int BatchSize { get; set; } = 1048576; - - /// - /// Enable categorical split or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseCat", new object[]{true, false})] - [Obsolete] - public bool? UseCat { get; set; } - - /// - /// Enable missing value auto infer or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseMissing", new object[]{true, false})] - [Obsolete] - public bool UseMissing { get; set; } = false; - - /// - /// Min number of instances per categorical group. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerGroup", new object[]{10, 50, 100, 200})] - [Obsolete] - public int MinDataPerGroup { get; set; } = 100; - - /// - /// Max number of categorical thresholds. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MaxCatThreshold", new object[]{8, 16, 32, 64})] - [Obsolete] - public int MaxCatThreshold { get; set; } = 32; - - /// - /// Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatSmooth", new object[]{1, 10, 20})] - [Obsolete] - public double CatSmooth { get; set; } = 10d; - - /// - /// L2 Regularization for categorical split. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatL2", new object[]{0.1f, 0.5f, 1, 5, 10})] - [Obsolete] - public double CatL2 { get; set; } = 10d; - - /// - /// Parallel LightGBM Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelLightGBM ParallelTrainer { get; set; } = new SingleParallelLightGBM(); - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LightGbmClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LightGbmClassifierPipelineStep(output); - } - - [Obsolete] - private class LightGbmClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LightGbmClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class LightGbmRanker : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of iterations. - /// - [TlcModule.SweepableDiscreteParamAttribute("NumBoostRound", new object[]{10, 20, 50, 100, 150, 200})] - [Obsolete] - public int NumBoostRound { get; set; } = 100; - - /// - /// Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1]. - /// - [TlcModule.SweepableFloatParamAttribute("LearningRate", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double? LearningRate { get; set; } - - /// - /// Maximum leaves for trees. - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int? NumLeaves { get; set; } - - /// - /// Minimum number of instances needed in a child. - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[]{1, 10, 20, 50})] - [Obsolete] - public int? MinDataPerLeaf { get; set; } - - /// - /// Max number of bucket bin for features. - /// - [Obsolete] - public int MaxBin { get; set; } = 255; - - /// - /// Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function. - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public BoosterParameterFunction Booster { get; set; } = new GbdtBoosterParameterFunction(); - - /// - /// Verbose - /// - [Obsolete] - public bool VerboseEval { get; set; } = false; - - /// - /// Printing running messages. - /// - [Obsolete] - public bool Silent { get; set; } = true; - - /// - /// Number of parallel threads used to run LightGBM. - /// - [Obsolete] - public int? NThread { get; set; } - - /// - /// Evaluation metrics. - /// - [Obsolete] - public LightGbmArgumentsEvalMetricType EvalMetric { get; set; } = LightGbmArgumentsEvalMetricType.DefaultMetric; - - /// - /// Use softmax loss for the multi classification. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseSoftmax", new object[]{true, false})] - [Obsolete] - public bool? UseSoftmax { get; set; } - - /// - /// Rounds of early stopping, 0 will disable it. - /// - [Obsolete] - public int EarlyStoppingRound { get; set; } - - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31,63,127,255,511,1023,2047,4095"; - - /// - /// Parameter for the sigmoid function. Used only in LightGbmBinaryTrainer, LightGbmMulticlassTrainer and in LightGbmRankingTrainer. - /// - [Obsolete] - public double Sigmoid { get; set; } = 0.5d; - - /// - /// Number of entries in a batch when loading data. - /// - [Obsolete] - public int BatchSize { get; set; } = 1048576; - - /// - /// Enable categorical split or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseCat", new object[]{true, false})] - [Obsolete] - public bool? UseCat { get; set; } - - /// - /// Enable missing value auto infer or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseMissing", new object[]{true, false})] - [Obsolete] - public bool UseMissing { get; set; } = false; - - /// - /// Min number of instances per categorical group. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerGroup", new object[]{10, 50, 100, 200})] - [Obsolete] - public int MinDataPerGroup { get; set; } = 100; - - /// - /// Max number of categorical thresholds. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MaxCatThreshold", new object[]{8, 16, 32, 64})] - [Obsolete] - public int MaxCatThreshold { get; set; } = 32; - - /// - /// Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatSmooth", new object[]{1, 10, 20})] - [Obsolete] - public double CatSmooth { get; set; } = 10d; - - /// - /// L2 Regularization for categorical split. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatL2", new object[]{0.1f, 0.5f, 1, 5, 10})] - [Obsolete] - public double CatL2 { get; set; } = 10d; - - /// - /// Parallel LightGBM Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelLightGBM ParallelTrainer { get; set; } = new SingleParallelLightGBM(); - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRankingOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LightGbmRanker)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LightGbmRankerPipelineStep(output); - } - - [Obsolete] - private class LightGbmRankerPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LightGbmRankerPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class LightGbmRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of iterations. - /// - [TlcModule.SweepableDiscreteParamAttribute("NumBoostRound", new object[]{10, 20, 50, 100, 150, 200})] - [Obsolete] - public int NumBoostRound { get; set; } = 100; - - /// - /// Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1]. - /// - [TlcModule.SweepableFloatParamAttribute("LearningRate", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double? LearningRate { get; set; } - - /// - /// Maximum leaves for trees. - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int? NumLeaves { get; set; } - - /// - /// Minimum number of instances needed in a child. - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[]{1, 10, 20, 50})] - [Obsolete] - public int? MinDataPerLeaf { get; set; } - - /// - /// Max number of bucket bin for features. - /// - [Obsolete] - public int MaxBin { get; set; } = 255; - - /// - /// Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function. - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public BoosterParameterFunction Booster { get; set; } = new GbdtBoosterParameterFunction(); - - /// - /// Verbose - /// - [Obsolete] - public bool VerboseEval { get; set; } = false; - - /// - /// Printing running messages. - /// - [Obsolete] - public bool Silent { get; set; } = true; - - /// - /// Number of parallel threads used to run LightGBM. - /// - [Obsolete] - public int? NThread { get; set; } - - /// - /// Evaluation metrics. - /// - [Obsolete] - public LightGbmArgumentsEvalMetricType EvalMetric { get; set; } = LightGbmArgumentsEvalMetricType.DefaultMetric; - - /// - /// Use softmax loss for the multi classification. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseSoftmax", new object[]{true, false})] - [Obsolete] - public bool? UseSoftmax { get; set; } - - /// - /// Rounds of early stopping, 0 will disable it. - /// - [Obsolete] - public int EarlyStoppingRound { get; set; } - - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31,63,127,255,511,1023,2047,4095"; - - /// - /// Parameter for the sigmoid function. Used only in LightGbmBinaryTrainer, LightGbmMulticlassTrainer and in LightGbmRankingTrainer. - /// - [Obsolete] - public double Sigmoid { get; set; } = 0.5d; - - /// - /// Number of entries in a batch when loading data. - /// - [Obsolete] - public int BatchSize { get; set; } = 1048576; - - /// - /// Enable categorical split or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseCat", new object[]{true, false})] - [Obsolete] - public bool? UseCat { get; set; } - - /// - /// Enable missing value auto infer or not. - /// - [TlcModule.SweepableDiscreteParamAttribute("UseMissing", new object[]{true, false})] - [Obsolete] - public bool UseMissing { get; set; } = false; - - /// - /// Min number of instances per categorical group. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MinDataPerGroup", new object[]{10, 50, 100, 200})] - [Obsolete] - public int MinDataPerGroup { get; set; } = 100; - - /// - /// Max number of categorical thresholds. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [TlcModule.SweepableDiscreteParamAttribute("MaxCatThreshold", new object[]{8, 16, 32, 64})] - [Obsolete] - public int MaxCatThreshold { get; set; } = 32; - - /// - /// Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatSmooth", new object[]{1, 10, 20})] - [Obsolete] - public double CatSmooth { get; set; } = 10d; - - /// - /// L2 Regularization for categorical split. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("CatL2", new object[]{0.1f, 0.5f, 1, 5, 10})] - [Obsolete] - public double CatL2 { get; set; } = 10d; - - /// - /// Parallel LightGBM Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelLightGBM ParallelTrainer { get; set; } = new SingleParallelLightGBM(); - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LightGbmRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LightGbmRegressorPipelineStep(output); - } - - [Obsolete] - private class LightGbmRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LightGbmRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Train a linear SVM. - /// - [Obsolete] - public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Regularizer constant - /// - [TlcModule.SweepableFloatParamAttribute("Lambda", 1E-05f, 0.1f, stepSize:10, isLogScale:true)] - [Obsolete] - public float Lambda { get; set; } = 0.001f; - - /// - /// Batch size - /// - [Obsolete] - public int BatchSize { get; set; } = 1; - - /// - /// Perform projection to unit-ball? Typically used with batch size > 1. - /// - [TlcModule.SweepableDiscreteParamAttribute("PerformProjection", new object[]{false, true})] - [Obsolete] - public bool PerformProjection { get; set; } = false; - - /// - /// No bias - /// - [TlcModule.SweepableDiscreteParamAttribute("NoBias", new object[]{false, true})] - [Obsolete] - public bool NoBias { get; set; } = false; - - /// - /// The calibrator kind to apply to the predictor. Specify null for no calibration - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public CalibratorTrainer Calibrator { get; set; } = new PlattCalibratorCalibratorTrainer(); - - /// - /// The maximum number of examples to use when training the calibrator - /// - [Obsolete] - public int MaxCalibrationExamples { get; set; } = 1000000; - - /// - /// Number of iterations - /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] - [Obsolete] - public int NumIterations { get; set; } = 1; - - /// - /// Initial Weights and bias, comma-separated - /// - [Obsolete] - public string InitialWeights { get; set; } - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether to shuffle for each training iteration - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Size of cache when trained in Scope - /// - [Obsolete] - public int StreamingCacheSize { get; set; } = 1000000; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LinearSvmBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LinearSvmBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class LinearSvmBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LinearSvmBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class LogisticRegressionBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Show statistics of training examples. - /// - [Obsolete] - public bool ShowTrainingStats { get; set; } = false; - - /// - /// L2 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L2Weight { get; set; } = 1f; - - /// - /// L1 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L1Weight { get; set; } = 1f; - - /// - /// Tolerance parameter for optimization convergence. Low = slower, more accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] - [Obsolete] - public float OptTol { get; set; } = 1E-07f; - - /// - /// Memory size for L-BFGS. Low=faster, less accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] - [Obsolete] - public int MemorySize { get; set; } = 20; - - /// - /// Maximum iterations. - /// - [TlcModule.SweepableLongParamAttribute("MaxIterations", 1, 2147483647)] - [Obsolete] - public int MaxIterations { get; set; } = 2147483647; - - /// - /// Run SGD to initialize LR weights, converging to this tolerance - /// - [Obsolete] - public float SgdInitializationTolerance { get; set; } - - /// - /// If set to true, produce no output during training. - /// - [Obsolete] - public bool Quiet { get; set; } = false; - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether or not to use threads. Default is true - /// - [Obsolete] - public bool UseThreads { get; set; } = true; - - /// - /// Number of threads - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// Force densification of the internal optimization vectors - /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] - [Obsolete] - public bool DenseOptimizer { get; set; } = false; - - /// - /// Enforce non-negative weights - /// - [Obsolete] - public bool EnforceNonNegativity { get; set; } = false; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LogisticRegressionBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LogisticRegressionBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class LogisticRegressionBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LogisticRegressionBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class LogisticRegressionClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Show statistics of training examples. - /// - [Obsolete] - public bool ShowTrainingStats { get; set; } = false; - - /// - /// L2 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L2Weight { get; set; } = 1f; - - /// - /// L1 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L1Weight { get; set; } = 1f; - - /// - /// Tolerance parameter for optimization convergence. Low = slower, more accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] - [Obsolete] - public float OptTol { get; set; } = 1E-07f; - - /// - /// Memory size for L-BFGS. Low=faster, less accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] - [Obsolete] - public int MemorySize { get; set; } = 20; - - /// - /// Maximum iterations. - /// - [TlcModule.SweepableLongParamAttribute("MaxIterations", 1, 2147483647)] - [Obsolete] - public int MaxIterations { get; set; } = 2147483647; - - /// - /// Run SGD to initialize LR weights, converging to this tolerance - /// - [Obsolete] - public float SgdInitializationTolerance { get; set; } - - /// - /// If set to true, produce no output during training. - /// - [Obsolete] - public bool Quiet { get; set; } = false; - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether or not to use threads. Default is true - /// - [Obsolete] - public bool UseThreads { get; set; } = true; - - /// - /// Number of threads - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// Force densification of the internal optimization vectors - /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] - [Obsolete] - public bool DenseOptimizer { get; set; } = false; - - /// - /// Enforce non-negative weights - /// - [Obsolete] - public bool EnforceNonNegativity { get; set; } = false; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LogisticRegressionClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new LogisticRegressionClassifierPipelineStep(output); - } - - [Obsolete] - private class LogisticRegressionClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public LogisticRegressionClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class NaiveBayesClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(NaiveBayesClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new NaiveBayesClassifierPipelineStep(output); - } - - [Obsolete] - private class NaiveBayesClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public NaiveBayesClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public RegressionLossFunction LossFunction { get; set; } = new SquaredLossRegressionLossFunction(); - - /// - /// Learning rate - /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] - [Obsolete] - public float LearningRate { get; set; } = 0.1f; - - /// - /// Decrease learning rate - /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] - [Obsolete] - public bool DecreaseLearningRate { get; set; } = true; - - /// - /// Number of examples after which weights will be reset to the current average - /// - [Obsolete] - public long? ResetWeightsAfterXExamples { get; set; } - - /// - /// Instead of updating averaged weights on every example, only update when loss is nonzero - /// - [Obsolete] - public bool DoLazyUpdates { get; set; } = true; - - /// - /// L2 Regularization Weight - /// - [TlcModule.SweepableFloatParamAttribute("L2RegularizerWeight", 0f, 0.4f)] - [Obsolete] - public float L2RegularizerWeight { get; set; } - - /// - /// Extra weight given to more recent updates - /// - [Obsolete] - public float RecencyGain { get; set; } - - /// - /// Whether Recency Gain is multiplicative (vs. additive) - /// - [Obsolete] - public bool RecencyGainMulti { get; set; } = false; - - /// - /// Do averaging? - /// - [Obsolete] - public bool Averaged { get; set; } = true; - - /// - /// The inexactness tolerance for averaging - /// - [Obsolete] - public float AveragedTolerance { get; set; } = 0.01f; - - /// - /// Number of iterations - /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] - [Obsolete] - public int NumIterations { get; set; } = 1; - - /// - /// Initial Weights and bias, comma-separated - /// - [Obsolete] - public string InitialWeights { get; set; } - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether to shuffle for each training iteration - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Size of cache when trained in Scope - /// - [Obsolete] - public int StreamingCacheSize { get; set; } = 1000000; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OnlineGradientDescentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new OnlineGradientDescentRegressorPipelineStep(output); - } - - [Obsolete] - private class OnlineGradientDescentRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public OnlineGradientDescentRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - [Obsolete] - public sealed partial class OrdinaryLeastSquaresRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// L2 regularization weight - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[]{1E-06f, 0.1f, 1f})] - [Obsolete] - public float L2Weight { get; set; } = 1E-06f; - - /// - /// Whether to calculate per parameter significance statistics - /// - [Obsolete] - public bool PerParameterSignificance { get; set; } = true; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OrdinaryLeastSquaresRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new OrdinaryLeastSquaresRegressorPipelineStep(output); - } - - [Obsolete] - private class OrdinaryLeastSquaresRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public OrdinaryLeastSquaresRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class PcaAnomalyDetector : Microsoft.ML.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The number of components in the PCA - /// - [TlcModule.SweepableDiscreteParamAttribute("Rank", new object[]{10, 20, 40, 80})] - [Obsolete] - public int Rank { get; set; } = 20; - - /// - /// Oversampling parameter for randomized PCA training - /// - [TlcModule.SweepableDiscreteParamAttribute("Oversampling", new object[]{10, 20, 40})] - [Obsolete] - public int Oversampling { get; set; } = 20; - - /// - /// If enabled, data is centered to be zero mean - /// - [TlcModule.SweepableDiscreteParamAttribute("Center", new object[]{false, true})] - [Obsolete] - public bool Center { get; set; } = true; - - /// - /// The seed for random number generation - /// - [Obsolete] - public int? Seed { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IAnomalyDetectionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PcaAnomalyDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new PcaAnomalyDetectorPipelineStep(output); - } - - [Obsolete] - private class PcaAnomalyDetectorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public PcaAnomalyDetectorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class PoissonRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// L2 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L2Weight { get; set; } = 1f; - - /// - /// L1 regularization weight - /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] - [Obsolete] - public float L1Weight { get; set; } = 1f; - - /// - /// Tolerance parameter for optimization convergence. Low = slower, more accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] - [Obsolete] - public float OptTol { get; set; } = 1E-07f; - - /// - /// Memory size for L-BFGS. Low=faster, less accurate - /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] - [Obsolete] - public int MemorySize { get; set; } = 20; - - /// - /// Maximum iterations. - /// - [TlcModule.SweepableLongParamAttribute("MaxIterations", 1, 2147483647)] - [Obsolete] - public int MaxIterations { get; set; } = 2147483647; - - /// - /// Run SGD to initialize LR weights, converging to this tolerance - /// - [Obsolete] - public float SgdInitializationTolerance { get; set; } - - /// - /// If set to true, produce no output during training. - /// - [Obsolete] - public bool Quiet { get; set; } = false; - - /// - /// Init weights diameter - /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] - [Obsolete] - public float InitWtsDiameter { get; set; } - - /// - /// Whether or not to use threads. Default is true - /// - [Obsolete] - public bool UseThreads { get; set; } = true; - - /// - /// Number of threads - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// Force densification of the internal optimization vectors - /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] - [Obsolete] - public bool DenseOptimizer { get; set; } = false; - - /// - /// Enforce non-negative weights - /// - [Obsolete] - public bool EnforceNonNegativity { get; set; } = false; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PoissonRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new PoissonRegressorPipelineStep(output); - } - - [Obsolete] - private class PoissonRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public PoissonRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public SDCAClassificationLossFunction LossFunction { get; set; } = new LogLossSDCAClassificationLossFunction(); - - /// - /// Apply weight to the positive class, for imbalanced data - /// - [Obsolete] - public float PositiveInstanceWeight { get; set; } = 1f; - - /// - /// The calibrator kind to apply to the predictor. Specify null for no calibration - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public CalibratorTrainer Calibrator { get; set; } = new PlattCalibratorCalibratorTrainer(); - - /// - /// The maximum number of examples to use when training the calibrator - /// - [Obsolete] - public int MaxCalibrationExamples { get; set; } = 1000000; - - /// - /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] - [Obsolete] - public float? L2Const { get; set; } - - /// - /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] - [Obsolete] - public float? L1Threshold { get; set; } - - /// - /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed. - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The tolerance for the ratio between duality gap and primal loss for convergence checking. - /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] - [Obsolete] - public float ConvergenceTolerance { get; set; } = 0.1f; - - /// - /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. - /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] - [Obsolete] - public int? MaxIterations { get; set; } - - /// - /// Shuffle data every epoch? - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations. - /// - [Obsolete] - public int? CheckFrequency { get; set; } - - /// - /// The learning rate for adjusting bias from being regularized. - /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] - [Obsolete] - public float BiasLearningRate { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new StochasticDualCoordinateAscentBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class StochasticDualCoordinateAscentBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public StochasticDualCoordinateAscentBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public SDCAClassificationLossFunction LossFunction { get; set; } = new LogLossSDCAClassificationLossFunction(); - - /// - /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] - [Obsolete] - public float? L2Const { get; set; } - - /// - /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] - [Obsolete] - public float? L1Threshold { get; set; } - - /// - /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed. - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The tolerance for the ratio between duality gap and primal loss for convergence checking. - /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] - [Obsolete] - public float ConvergenceTolerance { get; set; } = 0.1f; - - /// - /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. - /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] - [Obsolete] - public int? MaxIterations { get; set; } - - /// - /// Shuffle data every epoch? - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations. - /// - [Obsolete] - public int? CheckFrequency { get; set; } - - /// - /// The learning rate for adjusting bias from being regularized. - /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] - [Obsolete] - public float BiasLearningRate { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new StochasticDualCoordinateAscentClassifierPipelineStep(output); - } - - [Obsolete] - private class StochasticDualCoordinateAscentClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public StochasticDualCoordinateAscentClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// - [Obsolete] - public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public SDCARegressionLossFunction LossFunction { get; set; } = new SquaredLossSDCARegressionLossFunction(); - - /// - /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] - [Obsolete] - public float? L2Const { get; set; } - - /// - /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. - /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] - [Obsolete] - public float? L1Threshold { get; set; } - - /// - /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed. - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The tolerance for the ratio between duality gap and primal loss for convergence checking. - /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] - [Obsolete] - public float ConvergenceTolerance { get; set; } = 0.01f; - - /// - /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. - /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] - [Obsolete] - public int? MaxIterations { get; set; } - - /// - /// Shuffle data every epoch? - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations. - /// - [Obsolete] - public int? CheckFrequency { get; set; } - - /// - /// The learning rate for adjusting bias from being regularized. - /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] - [Obsolete] - public float BiasLearningRate { get; set; } = 1f; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new StochasticDualCoordinateAscentRegressorPipelineStep(output); - } - - [Obsolete] - private class StochasticDualCoordinateAscentRegressorPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public StochasticDualCoordinateAscentRegressorPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - /// Train an Hogwild SGD binary model. - /// - [Obsolete] - public sealed partial class StochasticGradientDescentBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Loss Function - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ClassificationLossFunction LossFunction { get; set; } = new LogLossClassificationLossFunction(); - - /// - /// L2 Regularization constant - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{1E-07f, 5E-07f, 1E-06f, 5E-06f, 1E-05f})] - [Obsolete] - public float L2Weight { get; set; } = 1E-06f; - - /// - /// Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed. - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// Exponential moving averaged improvement tolerance for convergence - /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.01f, 0.001f, 0.0001f, 1E-05f})] - [Obsolete] - public double ConvergenceTolerance { get; set; } = 0.0001d; - - /// - /// Maximum number of iterations; set to 1 to simulate online learning. - /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{1, 5, 10, 20})] - [Obsolete] - public int MaxIterations { get; set; } = 20; - - /// - /// Initial learning rate (only used by SGD) - /// - [Obsolete] - public double InitLearningRate { get; set; } = 0.01d; - - /// - /// Shuffle data every epoch? - /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Apply weight to the positive class, for imbalanced data - /// - [Obsolete] - public float PositiveInstanceWeight { get; set; } = 1f; - - /// - /// Convergence check frequency (in terms of number of iterations). Default equals number of threads - /// - [Obsolete] - public int? CheckFrequency { get; set; } - - /// - /// The calibrator kind to apply to the predictor. Specify null for no calibration - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public CalibratorTrainer Calibrator { get; set; } = new PlattCalibratorCalibratorTrainer(); - - /// - /// The maximum number of examples to use when training the calibrator - /// - [Obsolete] - public int MaxCalibrationExamples { get; set; } = 1000000; - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(StochasticGradientDescentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new StochasticGradientDescentBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class StochasticGradientDescentBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public StochasticGradientDescentBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Trainers - { - - /// - [Obsolete] - public sealed partial class SymSgdBinaryClassifier : Microsoft.ML.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Degree of lock-free parallelism. Determinism not guaranteed. Multi-threading is not supported currently. - /// - [Obsolete] - public int? NumberOfThreads { get; set; } - - /// - /// Number of passes over the data. - /// - [TlcModule.SweepableDiscreteParamAttribute("NumberOfIterations", new object[]{1, 5, 10, 20, 30, 40, 50})] - [Obsolete] - public int NumberOfIterations { get; set; } = 50; - - /// - /// Tolerance for difference in average loss in consecutive passes. - /// - [Obsolete] - public float Tolerance { get; set; } = 0.0001f; - - /// - /// Learning rate - /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{"", 10f, 1f, 0.1f, 0.01f, 0.001f})] - [Obsolete] - public float? LearningRate { get; set; } - - /// - /// L2 regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("L2Regularization", new object[]{0f, 1E-05f, 1E-05f, 1E-06f, 1E-07f})] - [Obsolete] - public float L2Regularization { get; set; } - - /// - /// The number of iterations each thread learns a local model until combining it with the global model. Low value means more updated global model and high value means less cache traffic. - /// - [TlcModule.SweepableDiscreteParamAttribute("UpdateFrequency", new object[]{"", 5, 20})] - [Obsolete] - public int? UpdateFrequency { get; set; } - - /// - /// The acceleration memory budget in MB - /// - [Obsolete] - public long MemorySize { get; set; } = 1024; - - /// - /// Shuffle data? - /// - [Obsolete] - public bool Shuffle { get; set; } = true; - - /// - /// Apply weight to the positive class, for imbalanced data - /// - [Obsolete] - public float PositiveInstanceWeight { get; set; } = 1f; - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.EntryPoints.CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => TrainingData; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(SymSgdBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - TrainingData = dataStep.Data; - } - Output output = experiment.Add(this); - return new SymSgdBinaryClassifierPipelineStep(output); - } - - [Obsolete] - private class SymSgdBinaryClassifierPipelineStep : ILearningPipelinePredictorStep - { - [Obsolete] - public SymSgdBinaryClassifierPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Approximate bootstrap sampling. - /// - [Obsolete] - public sealed partial class ApproximateBootstrapSampler : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform. - /// - [Obsolete] - public bool Complement { get; set; } = false; - - /// - /// The random seed. If unspecified random state will be instead derived from the environment. - /// - [Obsolete] - public uint? Seed { get; set; } - - /// - /// Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency. - /// - [Obsolete] - public bool ShuffleInput { get; set; } = true; - - /// - /// When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input. - /// - [Obsolete] - public int PoolSize { get; set; } = 1000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ApproximateBootstrapSampler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ApproximateBootstrapSamplerPipelineStep(output); - } - - [Obsolete] - private class ApproximateBootstrapSamplerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ApproximateBootstrapSamplerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. - /// - [Obsolete] - public sealed partial class BinaryPredictionScoreColumnsRenamer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predictor model used in scoring - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(BinaryPredictionScoreColumnsRenamer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new BinaryPredictionScoreColumnsRenamerPipelineStep(output); - } - - [Obsolete] - private class BinaryPredictionScoreColumnsRenamerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public BinaryPredictionScoreColumnsRenamerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class NormalizeTransformBinColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Max number of bins, power of 2 recommended - /// - [Obsolete] - public int? NumBins { get; set; } - - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool? FixZero { get; set; } - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long? MaxTrainingExamples { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins. - /// - [Obsolete] - public sealed partial class BinNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public BinNormalizer() - { - } - - public BinNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public BinNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NormalizeTransformBinColumn[] Column { get; set; } - - /// - /// Max number of bins, power of 2 recommended - /// - [Obsolete] - public int NumBins { get; set; } = 1024; - - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool FixZero { get; set; } = true; - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long MaxTrainingExamples { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(BinNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new BinNormalizerPipelineStep(output); - } - - [Obsolete] - private class BinNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public BinNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum OneHotEncodingTransformerOutputKind : byte - { - Bag = 1, - Ind = 2, - Key = 3, - Bin = 4 - } - - - [Obsolete] - public sealed partial class OneHotHashEncodingColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The number of bits to hash into. Must be between 1 and 30, inclusive. - /// - [Obsolete] - public int? HashBits { get; set; } - - /// - /// Hashing seed - /// - [Obsolete] - public uint? Seed { get; set; } - - /// - /// Whether the position of each term should be included in the hash - /// - [Obsolete] - public bool? Ordered { get; set; } - - /// - /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit. - /// - [Obsolete] - public int? InvertHash { get; set; } - - /// - /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) - /// - [Obsolete] - public OneHotEncodingTransformerOutputKind? OutputKind { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class CategoricalHashOneHotVectorizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public CategoricalHashOneHotVectorizer() - { - } - - public CategoricalHashOneHotVectorizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public CategoricalHashOneHotVectorizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:hashBits:src) - /// - [Obsolete] - public OneHotHashEncodingColumn[] Column { get; set; } - - /// - /// Number of bits to hash into. Must be between 1 and 30, inclusive. - /// - [Obsolete] - public int HashBits { get; set; } = 16; - - /// - /// Hashing seed - /// - [Obsolete] - public uint Seed { get; set; } = 314489979; - - /// - /// Whether the position of each term should be included in the hash - /// - [Obsolete] - public bool Ordered { get; set; } = true; - - /// - /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit. - /// - [Obsolete] - public int InvertHash { get; set; } - - /// - /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) - /// - [Obsolete] - public OneHotEncodingTransformerOutputKind OutputKind { get; set; } = OneHotEncodingTransformerOutputKind.Bag; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(CategoricalHashOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new CategoricalHashOneHotVectorizerPipelineStep(output); - } - - [Obsolete] - private class CategoricalHashOneHotVectorizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public CategoricalHashOneHotVectorizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum ValueToKeyMappingTransformerSortOrder : byte - { - Occurrence = 0, - Value = 1 - } - - - [Obsolete] - public sealed partial class OneHotEncodingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector - /// - [Obsolete] - public OneHotEncodingTransformerOutputKind? OutputKind { get; set; } - - /// - /// Maximum number of terms to keep when auto-training - /// - [Obsolete] - public int? MaxNumTerms { get; set; } - - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder? Sort { get; set; } - - /// - /// Whether key value metadata should be text, regardless of the actual input type - /// - [Obsolete] - public bool? TextKeyValues { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class CategoricalOneHotVectorizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public CategoricalOneHotVectorizer() - { - } - - public CategoricalOneHotVectorizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public CategoricalOneHotVectorizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public OneHotEncodingTransformerColumn[] Column { get; set; } - - /// - /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) - /// - [Obsolete] - public OneHotEncodingTransformerOutputKind OutputKind { get; set; } = OneHotEncodingTransformerOutputKind.Ind; - - /// - /// Maximum number of terms to keep per column when auto-training - /// - [Obsolete] - public int MaxNumTerms { get; set; } = 1000000; - - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder Sort { get; set; } = ValueToKeyMappingTransformerSortOrder.Occurrence; - - /// - /// Whether key value metadata should be text, regardless of the actual input type - /// - [Obsolete] - public bool TextKeyValues { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(CategoricalOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new CategoricalOneHotVectorizerPipelineStep(output); - } - - [Obsolete] - private class CategoricalOneHotVectorizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public CategoricalOneHotVectorizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class TokenizingByCharactersTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - [Obsolete] - public sealed partial class CharacterTokenizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public CharacterTokenizer() - { - } - - public CharacterTokenizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public CharacterTokenizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public TokenizingByCharactersTransformerColumn[] Column { get; set; } - - /// - /// Whether to mark the beginning/end of each row/slot with start of text character (0x02)/end of text character (0x03) - /// - [Obsolete] - public bool UseMarkerChars { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(CharacterTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new CharacterTokenizerPipelineStep(output); - } - - [Obsolete] - private class CharacterTokenizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public CharacterTokenizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ColumnConcatenatingTransformerColumn : ManyToOneColumn, IManyToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string[] Source { get; set; } - - } - - /// - /// Concatenates one or more columns of the same item type. - /// - [Obsolete] - public sealed partial class ColumnConcatenator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ColumnConcatenator() - { - } - - public ColumnConcatenator(string outputColumn, params string[] inputColumns) - { - AddColumn(outputColumn, inputColumns); - } - - public void AddColumn(string name, params string[] source) - { - var list = Column == null ? new List() : new List(Column); - list.Add(ManyToOneColumn.Create(name, source)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:srcs) - /// - [Obsolete] - public ColumnConcatenatingTransformerColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ColumnConcatenator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ColumnConcatenatorPipelineStep(output); - } - - [Obsolete] - private class ColumnConcatenatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ColumnConcatenatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ColumnCopyingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Duplicates columns from the dataset - /// - [Obsolete] - public sealed partial class ColumnCopier : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ColumnCopier() - { - } - - public ColumnCopier(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ColumnCopier(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ColumnCopyingTransformerColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ColumnCopier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ColumnCopierPipelineStep(output); - } - - [Obsolete] - private class ColumnCopierPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ColumnCopierPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Selects a set of columns, dropping all others - /// - [Obsolete] - public sealed partial class ColumnSelector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// List of columns to keep. - /// - [Obsolete] - public string[] KeepColumns { get; set; } - - /// - /// List of columns to drop. - /// - [Obsolete] - public string[] DropColumns { get; set; } - - /// - /// Specifies whether to keep or remove hidden columns. - /// - [Obsolete] - public bool KeepHidden { get; set; } = false; - - /// - /// Specifies whether to ignore columns that are missing from the input. - /// - [Obsolete] - public bool IgnoreMissing { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ColumnSelectorPipelineStep(output); - } - - [Obsolete] - private class ColumnSelectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ColumnSelectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class TypeConvertingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The result type - /// - [Obsolete] - public Microsoft.ML.Legacy.Data.DataKind? ResultType { get; set; } - - /// - /// For a key column, this defines the range of values - /// - [Obsolete] - public string Range { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Converts a column to a different type, using standard conversions. - /// - [Obsolete] - public sealed partial class ColumnTypeConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ColumnTypeConverter() - { - } - - public ColumnTypeConverter(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ColumnTypeConverter(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:type:src) - /// - [Obsolete] - public TypeConvertingTransformerColumn[] Column { get; set; } - - /// - /// The result type - /// - [Obsolete] - public Microsoft.ML.Legacy.Data.DataKind? ResultType { get; set; } - - /// - /// For a key column, this defines the range of values - /// - [Obsolete] - public string Range { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ColumnTypeConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ColumnTypeConverterPipelineStep(output); - } - - [Obsolete] - private class ColumnTypeConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ColumnTypeConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - [Obsolete] - public sealed partial class CombinerByContiguousGroupId : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to group by - /// - [Obsolete] - public string[] GroupKey { get; set; } - - /// - /// Columns to group together - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(CombinerByContiguousGroupId)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new CombinerByContiguousGroupIdPipelineStep(output); - } - - [Obsolete] - private class CombinerByContiguousGroupIdPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public CombinerByContiguousGroupIdPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class NormalizeTransformAffineColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool? FixZero { get; set; } - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long? MaxTrainingExamples { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Normalize the columns only if needed - /// - [Obsolete] - public sealed partial class ConditionalNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ConditionalNormalizer() - { - } - - public ConditionalNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ConditionalNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NormalizeTransformAffineColumn[] Column { get; set; } - - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool FixZero { get; set; } = true; - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long MaxTrainingExamples { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ConditionalNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ConditionalNormalizerPipelineStep(output); - } - - [Obsolete] - private class ConditionalNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ConditionalNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum CacheCachingType - { - Memory = 0, - Disk = 1 - } - - - /// - /// Caches using the specified cache option. - /// - [Obsolete] - public sealed partial class DataCache : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Caching strategy - /// - [Obsolete] - public CacheCachingType Caching { get; set; } = CacheCachingType.Memory; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// Dataset - /// - public Var OutputData { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(DataCache)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new DataCachePipelineStep(output); - } - - [Obsolete] - private class DataCachePipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public DataCachePipelineStep(Output output) - { - Data = output.OutputData; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Score a dataset with a predictor model - /// - [Obsolete] - public sealed partial class DatasetScorer - { - - - /// - /// The dataset to be scored - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// The predictor model to apply to data - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - /// - /// Suffix to append to the score columns - /// - [Obsolete] - public string Suffix { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// The scored dataset - /// - public Var ScoredData { get; set; } = new Var(); - - /// - /// The scoring transform - /// - public Var ScoringTransform { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Score a dataset with a transform model - /// - [Obsolete] - public sealed partial class DatasetTransformScorer - { - - - /// - /// The dataset to be scored - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// The transform model to apply to data - /// - [Obsolete] - public Var TransformModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// The scored dataset - /// - public Var ScoredData { get; set; } = new Var(); - - /// - /// The scoring transform - /// - public Var ScoringTransform { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ValueToKeyMappingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Maximum number of terms to keep when auto-training - /// - [Obsolete] - public int? MaxNumTerms { get; set; } - - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder? Sort { get; set; } - - /// - /// Whether key value metadata should be text, regardless of the actual input type - /// - [Obsolete] - public bool? TextKeyValues { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Converts input values (words, numbers, etc.) to index in a dictionary. - /// - [Obsolete] - public sealed partial class Dictionarizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public Dictionarizer() - { - } - - public Dictionarizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public Dictionarizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ValueToKeyMappingTransformerColumn[] Column { get; set; } - - /// - /// Maximum number of terms to keep per column when auto-training - /// - [Obsolete] - public int MaxNumTerms { get; set; } = 1000000; - - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder Sort { get; set; } = ValueToKeyMappingTransformerSortOrder.Occurrence; - - /// - /// Whether key value metadata should be text, regardless of the actual input type - /// - [Obsolete] - public bool TextKeyValues { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(Dictionarizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new DictionarizerPipelineStep(output); - } - - [Obsolete] - private class DictionarizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public DictionarizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Combines all the features into one feature column. - /// - [Obsolete] - public sealed partial class FeatureCombiner : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Features - /// - [Obsolete] - public string[] Features { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FeatureCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new FeatureCombinerPipelineStep(output); - } - - [Obsolete] - private class FeatureCombinerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public FeatureCombinerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// For each data point, calculates the contribution of individual features to the model prediction. - /// - [Obsolete] - public sealed partial class FeatureContributionCalculationTransformer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predictor model to apply to data - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - /// - /// Name of feature column - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Number of top contributions - /// - [Obsolete] - public int Top { get; set; } = 10; - - /// - /// Number of bottom contributions - /// - [Obsolete] - public int Bottom { get; set; } = 10; - - /// - /// Whether or not output of Features contribution should be normalized - /// - [Obsolete] - public bool Normalize { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FeatureContributionCalculationTransformer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new FeatureContributionCalculationTransformerPipelineStep(output); - } - - [Obsolete] - private class FeatureContributionCalculationTransformerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public FeatureContributionCalculationTransformerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class FeatureSelectorByCount : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to use for feature selection - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved - /// - [Obsolete] - public long Count { get; set; } = 1; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FeatureSelectorByCount)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new FeatureSelectorByCountPipelineStep(output); - } - - [Obsolete] - private class FeatureSelectorByCountPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public FeatureSelectorByCountPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class FeatureSelectorByMutualInformation : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to use for feature selection - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The maximum number of slots to preserve in output - /// - [Obsolete] - public int SlotsInOutput { get; set; } = 1000; - - /// - /// Max number of bins for R4/R8 columns, power of 2 recommended - /// - [Obsolete] - public int NumBins { get; set; } = 256; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(FeatureSelectorByMutualInformation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new FeatureSelectorByMutualInformationPipelineStep(output); - } - - [Obsolete] - private class FeatureSelectorByMutualInformationPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public FeatureSelectorByMutualInformationPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class LpNormalizingTransformerGcnColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Normalize by standard deviation rather than L2 norm - /// - [Obsolete] - public bool? UseStdDev { get; set; } - - /// - /// Scale features by this value - /// - [Obsolete] - public float? Scale { get; set; } - - /// - /// Subtract mean from each value before normalizing - /// - [Obsolete] - public bool? SubMean { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - [Obsolete] - public sealed partial class GlobalContrastNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public GlobalContrastNormalizer() - { - } - - public GlobalContrastNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public GlobalContrastNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public LpNormalizingTransformerGcnColumn[] Column { get; set; } - - /// - /// Subtract mean from each value before normalizing - /// - [Obsolete] - public bool SubMean { get; set; } = true; - - /// - /// Normalize by standard deviation rather than L2 norm - /// - [Obsolete] - public bool UseStdDev { get; set; } = false; - - /// - /// Scale features by this value - /// - [Obsolete] - public float Scale { get; set; } = 1f; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(GlobalContrastNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new GlobalContrastNormalizerPipelineStep(output); - } - - [Obsolete] - private class GlobalContrastNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public GlobalContrastNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class HashJoiningTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Whether the values need to be combined for a single hash - /// - [Obsolete] - public bool? Join { get; set; } - - /// - /// Which slots should be combined together. Example: 0,3,5;0,1;3;2,1,0. Overrides 'join'. - /// - [Obsolete] - public string CustomSlotMap { get; set; } - - /// - /// Number of bits to hash into. Must be between 1 and 31, inclusive. - /// - [Obsolete] - public int? HashBits { get; set; } - - /// - /// Hashing seed - /// - [Obsolete] - public uint? Seed { get; set; } - - /// - /// Whether the position of each term should be included in the hash - /// - [Obsolete] - public bool? Ordered { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class HashConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public HashConverter() - { - } - - public HashConverter(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public HashConverter(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public HashJoiningTransformColumn[] Column { get; set; } - - /// - /// Whether the values need to be combined for a single hash - /// - [Obsolete] - public bool Join { get; set; } = true; - - /// - /// Number of bits to hash into. Must be between 1 and 31, inclusive. - /// - [Obsolete] - public int HashBits { get; set; } = 31; - - /// - /// Hashing seed - /// - [Obsolete] - public uint Seed { get; set; } = 314489979; - - /// - /// Whether the position of each term should be included in the hash - /// - [Obsolete] - public bool Ordered { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(HashConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new HashConverterPipelineStep(output); - } - - [Obsolete] - private class HashConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public HashConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ImageGrayscaleTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Convert image into grayscale. - /// - [Obsolete] - public sealed partial class ImageGrayscale : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ImageGrayscale() - { - } - - public ImageGrayscale(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ImageGrayscale(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ImageGrayscaleTransformColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ImageGrayscale)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ImageGrayscalePipelineStep(output); - } - - [Obsolete] - private class ImageGrayscalePipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ImageGrayscalePipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ImageLoaderTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Load images from files. - /// - [Obsolete] - public sealed partial class ImageLoader : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ImageLoader() - { - } - - public ImageLoader(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ImageLoader(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ImageLoaderTransformColumn[] Column { get; set; } - - /// - /// Folder where to search for images - /// - [Obsolete] - public string ImageFolder { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ImageLoader)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ImageLoaderPipelineStep(output); - } - - [Obsolete] - private class ImageLoaderPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ImageLoaderPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class ImagePixelExtractorTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Whether to use alpha channel - /// - [Obsolete] - public bool? UseAlpha { get; set; } - - /// - /// Whether to use red channel - /// - [Obsolete] - public bool? UseRed { get; set; } - - /// - /// Whether to use green channel - /// - [Obsolete] - public bool? UseGreen { get; set; } - - /// - /// Whether to use blue channel - /// - [Obsolete] - public bool? UseBlue { get; set; } - - /// - /// Whether to separate each channel or interleave in ARGB order - /// - [Obsolete] - public bool? InterleaveArgb { get; set; } - - /// - /// Whether to convert to floating point - /// - [Obsolete] - public bool? Convert { get; set; } - - /// - /// Offset (pre-scale) - /// - [Obsolete] - public float? Offset { get; set; } - - /// - /// Scale factor - /// - [Obsolete] - public float? Scale { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Extract color plane(s) from an image. Options include scaling, offset and conversion to floating point. - /// - [Obsolete] - public sealed partial class ImagePixelExtractor : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ImagePixelExtractor() - { - } - - public ImagePixelExtractor(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ImagePixelExtractor(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ImagePixelExtractorTransformColumn[] Column { get; set; } - - /// - /// Whether to use alpha channel - /// - [Obsolete] - public bool UseAlpha { get; set; } = false; - - /// - /// Whether to use red channel - /// - [Obsolete] - public bool UseRed { get; set; } = true; - - /// - /// Whether to use green channel - /// - [Obsolete] - public bool UseGreen { get; set; } = true; - - /// - /// Whether to use blue channel - /// - [Obsolete] - public bool UseBlue { get; set; } = true; - - /// - /// Whether to separate each channel or interleave in ARGB order - /// - [Obsolete] - public bool InterleaveArgb { get; set; } = false; - - /// - /// Whether to convert to floating point - /// - [Obsolete] - public bool Convert { get; set; } = true; - - /// - /// Offset (pre-scale) - /// - [Obsolete] - public float? Offset { get; set; } - - /// - /// Scale factor - /// - [Obsolete] - public float? Scale { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ImagePixelExtractor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ImagePixelExtractorPipelineStep(output); - } - - [Obsolete] - private class ImagePixelExtractorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ImagePixelExtractorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum ImageResizerTransformResizingKind : byte - { - IsoPad = 0, - IsoCrop = 1 - } - - [Obsolete] - public enum ImageResizerTransformAnchor : byte - { - Right = 0, - Left = 1, - Top = 2, - Bottom = 3, - Center = 4 - } - - - [Obsolete] - public sealed partial class ImageResizerTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Width of the resized image - /// - [Obsolete] - public int? ImageWidth { get; set; } - - /// - /// Height of the resized image - /// - [Obsolete] - public int? ImageHeight { get; set; } - - /// - /// Resizing method - /// - [Obsolete] - public ImageResizerTransformResizingKind? Resizing { get; set; } - - /// - /// Anchor for cropping - /// - [Obsolete] - public ImageResizerTransformAnchor? CropAnchor { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Scales an image to specified dimensions using one of the three scale types: isotropic with padding, isotropic with cropping or anisotropic. In case of isotropic padding, transparent color is used to pad resulting image. - /// - [Obsolete] - public sealed partial class ImageResizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public ImageResizer() - { - } - - public ImageResizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public ImageResizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ImageResizerTransformColumn[] Column { get; set; } - - /// - /// Resized width of the image - /// - [Obsolete] - public int ImageWidth { get; set; } - - /// - /// Resized height of the image - /// - [Obsolete] - public int ImageHeight { get; set; } - - /// - /// Resizing method - /// - [Obsolete] - public ImageResizerTransformResizingKind Resizing { get; set; } = ImageResizerTransformResizingKind.IsoCrop; - - /// - /// Anchor for cropping - /// - [Obsolete] - public ImageResizerTransformAnchor CropAnchor { get; set; } = ImageResizerTransformAnchor.Center; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ImageResizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ImageResizerPipelineStep(output); - } - - [Obsolete] - private class ImageResizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ImageResizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class KeyToValueMappingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - [Obsolete] - public sealed partial class KeyToTextConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public KeyToTextConverter() - { - } - - public KeyToTextConverter(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public KeyToTextConverter(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public KeyToValueMappingTransformerColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(KeyToTextConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new KeyToTextConverterPipelineStep(output); - } - - [Obsolete] - private class KeyToTextConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public KeyToTextConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Transforms the label to either key or bool (if needed) to make it suitable for classification. - /// - [Obsolete] - public sealed partial class LabelColumnKeyBooleanConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Convert the key values to text - /// - [Obsolete] - public bool TextKeyValues { get; set; } = true; - - /// - /// The label column - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LabelColumnKeyBooleanConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LabelColumnKeyBooleanConverterPipelineStep(output); - } - - [Obsolete] - private class LabelColumnKeyBooleanConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LabelColumnKeyBooleanConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class LabelIndicatorTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The positive example class for binary classification. - /// - [Obsolete] - public int? ClassIndex { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Label remapper used by OVA - /// - [Obsolete] - public sealed partial class LabelIndicator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public LabelIndicator() - { - } - - public LabelIndicator(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public LabelIndicator(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public LabelIndicatorTransformColumn[] Column { get; set; } - - /// - /// Label of the positive class. - /// - [Obsolete] - public int ClassIndex { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LabelIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LabelIndicatorPipelineStep(output); - } - - [Obsolete] - private class LabelIndicatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LabelIndicatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Transforms the label to float to make it suitable for regression. - /// - [Obsolete] - public sealed partial class LabelToFloatConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The label column - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LabelToFloatConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LabelToFloatConverterPipelineStep(output); - } - - [Obsolete] - private class LabelToFloatConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LabelToFloatConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class LatentDirichletAllocationTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The number of topics - /// - [Obsolete] - public int? NumTopic { get; set; } - - /// - /// Dirichlet prior on document-topic vectors - /// - [Obsolete] - public float? AlphaSum { get; set; } - - /// - /// Dirichlet prior on vocab-topic vectors - /// - [Obsolete] - public float? Beta { get; set; } - - /// - /// Number of Metropolis Hasting step - /// - [Obsolete] - public int? Mhstep { get; set; } - - /// - /// Number of iterations - /// - [Obsolete] - public int? NumIterations { get; set; } - - /// - /// Compute log likelihood over local dataset on this iteration interval - /// - [Obsolete] - public int? LikelihoodInterval { get; set; } - - /// - /// The number of training threads - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The threshold of maximum count of tokens per doc - /// - [Obsolete] - public int? NumMaxDocToken { get; set; } - - /// - /// The number of words to summarize the topic - /// - [Obsolete] - public int? NumSummaryTermPerTopic { get; set; } - - /// - /// The number of burn-in iterations - /// - [Obsolete] - public int? NumBurninIterations { get; set; } = 10; - - /// - /// Reset the random number generator for each document - /// - [Obsolete] - public bool? ResetRandomGenerator { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class LightLda : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public LightLda() - { - } - - public LightLda(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public LightLda(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:srcs) - /// - [Obsolete] - public LatentDirichletAllocationTransformerColumn[] Column { get; set; } - - /// - /// The number of topics - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTopic", new object[]{20, 40, 100, 200})] - [Obsolete] - public int NumTopic { get; set; } = 100; - - /// - /// Dirichlet prior on document-topic vectors - /// - [TlcModule.SweepableDiscreteParamAttribute("AlphaSum", new object[]{1, 10, 100, 200})] - [Obsolete] - public float AlphaSum { get; set; } = 100f; - - /// - /// Dirichlet prior on vocab-topic vectors - /// - [TlcModule.SweepableDiscreteParamAttribute("Beta", new object[]{0.01f, 0.015f, 0.07f, 0.02f})] - [Obsolete] - public float Beta { get; set; } = 0.01f; - - /// - /// Number of Metropolis Hasting step - /// - [TlcModule.SweepableDiscreteParamAttribute("Mhstep", new object[]{2, 4, 8, 16})] - [Obsolete] - public int Mhstep { get; set; } = 4; - - /// - /// Number of iterations - /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{100, 200, 300, 400})] - [Obsolete] - public int NumIterations { get; set; } = 200; - - /// - /// Compute log likelihood over local dataset on this iteration interval - /// - [Obsolete] - public int LikelihoodInterval { get; set; } = 5; - - /// - /// The number of training threads. Default value depends on number of logical processors. - /// - [Obsolete] - public int NumThreads { get; set; } - - /// - /// The threshold of maximum count of tokens per doc - /// - [Obsolete] - public int NumMaxDocToken { get; set; } = 512; - - /// - /// The number of words to summarize the topic - /// - [Obsolete] - public int NumSummaryTermPerTopic { get; set; } = 10; - - /// - /// The number of burn-in iterations - /// - [TlcModule.SweepableDiscreteParamAttribute("NumBurninIterations", new object[]{10, 20, 30, 40})] - [Obsolete] - public int NumBurninIterations { get; set; } = 10; - - /// - /// Reset the random number generator for each document - /// - [Obsolete] - public bool ResetRandomGenerator { get; set; } = false; - - /// - /// Whether to output the topic-word summary in text format - /// - [Obsolete] - public bool OutputTopicWordSummary { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LightLda)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LightLdaPipelineStep(output); - } - - [Obsolete] - private class LightLdaPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LightLdaPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class NormalizeTransformLogNormalColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long? MaxTrainingExamples { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Normalizes the data based on the computed mean and variance of the logarithm of the data. - /// - [Obsolete] - public sealed partial class LogMeanVarianceNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public LogMeanVarianceNormalizer() - { - } - - public LogMeanVarianceNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public LogMeanVarianceNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// Whether to use CDF as the output - /// - [Obsolete] - public bool UseCdf { get; set; } = true; - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NormalizeTransformLogNormalColumn[] Column { get; set; } - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long MaxTrainingExamples { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LogMeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LogMeanVarianceNormalizerPipelineStep(output); - } - - [Obsolete] - private class LogMeanVarianceNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LogMeanVarianceNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum LpNormalizingEstimatorBaseNormalizerKind : byte - { - L2Norm = 0, - StdDev = 1, - L1Norm = 2, - LInf = 3 - } - - - [Obsolete] - public sealed partial class LpNormalizingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The norm to use to normalize each sample - /// - [Obsolete] - public LpNormalizingEstimatorBaseNormalizerKind? NormKind { get; set; } - - /// - /// Subtract mean from each value before normalizing - /// - [Obsolete] - public bool? SubMean { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - [Obsolete] - public sealed partial class LpNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public LpNormalizer() - { - } - - public LpNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public LpNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public LpNormalizingTransformerColumn[] Column { get; set; } - - /// - /// The norm to use to normalize each sample - /// - [Obsolete] - public LpNormalizingEstimatorBaseNormalizerKind NormKind { get; set; } = LpNormalizingEstimatorBaseNormalizerKind.L2Norm; - - /// - /// Subtract mean from each value before normalizing - /// - [Obsolete] - public bool SubMean { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(LpNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new LpNormalizerPipelineStep(output); - } - - [Obsolete] - private class LpNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public LpNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel. - /// - [Obsolete] - public sealed partial class ManyHeterogeneousModelCombiner - { - - - /// - /// Transform model - /// - [Obsolete] - public ArrayVar TransformModels { get; set; } = new ArrayVar(); - - /// - /// Predictor model - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// Predictor model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Normalizes the data based on the computed mean and variance of the data. - /// - [Obsolete] - public sealed partial class MeanVarianceNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MeanVarianceNormalizer() - { - } - - public MeanVarianceNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MeanVarianceNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// Whether to use CDF as the output - /// - [Obsolete] - public bool UseCdf { get; set; } = false; - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NormalizeTransformAffineColumn[] Column { get; set; } - - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool FixZero { get; set; } = true; - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long MaxTrainingExamples { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MeanVarianceNormalizerPipelineStep(output); - } - - [Obsolete] - private class MeanVarianceNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MeanVarianceNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Normalizes the data based on the observed minimum and maximum values of the data. - /// - [Obsolete] - public sealed partial class MinMaxNormalizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MinMaxNormalizer() - { - } - - public MinMaxNormalizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MinMaxNormalizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NormalizeTransformAffineColumn[] Column { get; set; } - - /// - /// Whether to map zero to zero, preserving sparsity - /// - [Obsolete] - public bool FixZero { get; set; } = true; - - /// - /// Max number of examples used to train the normalizer - /// - [Obsolete] - public long MaxTrainingExamples { get; set; } = 1000000000; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MinMaxNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MinMaxNormalizerPipelineStep(output); - } - - [Obsolete] - private class MinMaxNormalizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MinMaxNormalizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum MissingValueHandlingTransformerReplacementKind : byte - { - DefaultValue = 0, - Mean = 1, - Minimum = 2, - Maximum = 3 - } - - - [Obsolete] - public sealed partial class MissingValueHandlingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The replacement method to utilize - /// - [Obsolete] - public MissingValueHandlingTransformerReplacementKind? Kind { get; set; } - - /// - /// Whether to impute values by slot - /// - [Obsolete] - public bool? ImputeBySlot { get; set; } - - /// - /// Whether or not to concatenate an indicator vector column to the value column - /// - [Obsolete] - public bool? ConcatIndicator { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class MissingValueHandler : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MissingValueHandler() - { - } - - public MissingValueHandler(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MissingValueHandler(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:rep:src) - /// - [Obsolete] - public MissingValueHandlingTransformerColumn[] Column { get; set; } - - /// - /// The replacement method to utilize - /// - [Obsolete] - public MissingValueHandlingTransformerReplacementKind ReplaceWith { get; set; } = MissingValueHandlingTransformerReplacementKind.DefaultValue; - - /// - /// Whether to impute values by slot - /// - [Obsolete] - public bool ImputeBySlot { get; set; } = true; - - /// - /// Whether or not to concatenate an indicator vector column to the value column - /// - [Obsolete] - public bool Concat { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MissingValueHandler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MissingValueHandlerPipelineStep(output); - } - - [Obsolete] - private class MissingValueHandlerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MissingValueHandlerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class MissingValueIndicatorTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class MissingValueIndicator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MissingValueIndicator() - { - } - - public MissingValueIndicator(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MissingValueIndicator(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public MissingValueIndicatorTransformerColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MissingValueIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MissingValueIndicatorPipelineStep(output); - } - - [Obsolete] - private class MissingValueIndicatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MissingValueIndicatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class MissingValueDroppingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class MissingValuesDropper : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MissingValuesDropper() - { - } - - public MissingValuesDropper(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MissingValuesDropper(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// Columns to drop the NAs for - /// - [Obsolete] - public MissingValueDroppingTransformerColumn[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MissingValuesDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MissingValuesDropperPipelineStep(output); - } - - [Obsolete] - private class MissingValuesDropperPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MissingValuesDropperPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class MissingValuesRowDropper : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Column - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// If true, keep only rows that contain NA values, and filter the rest. - /// - [Obsolete] - public bool Complement { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MissingValuesRowDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MissingValuesRowDropperPipelineStep(output); - } - - [Obsolete] - private class MissingValuesRowDropperPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MissingValuesRowDropperPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum MissingValueReplacingTransformerReplacementKind : byte - { - DefaultValue = 0, - Mean = 1, - Minimum = 2, - Maximum = 3, - SpecifiedValue = 4 - } - - - [Obsolete] - public sealed partial class MissingValueReplacingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Replacement value for NAs (uses default value if not given) - /// - [Obsolete] - public string ReplacementString { get; set; } - - /// - /// The replacement method to utilize - /// - [Obsolete] - public MissingValueReplacingTransformerReplacementKind? Kind { get; set; } - - /// - /// Whether to impute values by slot - /// - [Obsolete] - public bool? Slot { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class MissingValueSubstitutor : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public MissingValueSubstitutor() - { - } - - public MissingValueSubstitutor(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public MissingValueSubstitutor(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:rep:src) - /// - [Obsolete] - public MissingValueReplacingTransformerColumn[] Column { get; set; } - - /// - /// The replacement method to utilize - /// - [Obsolete] - public MissingValueReplacingTransformerReplacementKind ReplacementKind { get; set; } = MissingValueReplacingTransformerReplacementKind.DefaultValue; - - /// - /// Whether to impute values by slot - /// - [Obsolete] - public bool ImputeBySlot { get; set; } = true; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(MissingValueSubstitutor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new MissingValueSubstitutorPipelineStep(output); - } - - [Obsolete] - private class MissingValueSubstitutorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public MissingValueSubstitutorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Combines a sequence of TransformModels into a single model - /// - [Obsolete] - public sealed partial class ModelCombiner - { - - - /// - /// Input models - /// - [Obsolete] - public ArrayVar Models { get; set; } = new ArrayVar(); - - - [Obsolete] - public sealed class Output - { - /// - /// Combined model - /// - public Var OutputModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum NgramExtractingEstimatorWeightingCriteria - { - Tf = 0, - Idf = 1, - TfIdf = 2 - } - - - [Obsolete] - public sealed partial class NgramExtractingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Maximum ngram length - /// - [Obsolete] - public int? NgramLength { get; set; } - - /// - /// Whether to include all ngram lengths up to NgramLength or only NgramLength - /// - [Obsolete] - public bool? AllLengths { get; set; } - - /// - /// Maximum number of tokens to skip when constructing an ngram - /// - [Obsolete] - public int? SkipLength { get; set; } - - /// - /// Maximum number of ngrams to store in the dictionary - /// - [Obsolete] - public int[] MaxNumTerms { get; set; } - - /// - /// Statistical measure used to evaluate how important a word is to a document in a corpus - /// - [Obsolete] - public NgramExtractingEstimatorWeightingCriteria? Weighting { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - [Obsolete] - public sealed partial class NGramTranslator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public NGramTranslator() - { - } - - public NGramTranslator(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public NGramTranslator(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public NgramExtractingTransformerColumn[] Column { get; set; } - - /// - /// Maximum ngram length - /// - [Obsolete] - public int NgramLength { get; set; } = 2; - - /// - /// Whether to store all ngram lengths up to ngramLength, or only ngramLength - /// - [Obsolete] - public bool AllLengths { get; set; } = true; - - /// - /// Maximum number of tokens to skip when constructing an ngram - /// - [Obsolete] - public int SkipLength { get; set; } - - /// - /// Maximum number of ngrams to store in the dictionary - /// - [Obsolete] - public int[] MaxNumTerms { get; set; } = { 10000000 }; - - /// - /// The weighting criteria - /// - [Obsolete] - public NgramExtractingEstimatorWeightingCriteria Weighting { get; set; } = NgramExtractingEstimatorWeightingCriteria.Tf; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(NGramTranslator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new NGramTranslatorPipelineStep(output); - } - - [Obsolete] - private class NGramTranslatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public NGramTranslatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Does nothing. - /// - [Obsolete] - public sealed partial class NoOperation : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(NoOperation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new NoOperationPipelineStep(output); - } - - [Obsolete] - private class NoOperationPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public NoOperationPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class OptionalColumnCreator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// New column definition(s) - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OptionalColumnCreator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new OptionalColumnCreatorPipelineStep(output); - } - - [Obsolete] - private class OptionalColumnCreatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public OptionalColumnCreatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class PcaTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// The name of the weight column - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// The number of components in the PCA - /// - [Obsolete] - public int? Rank { get; set; } - - /// - /// Oversampling parameter for randomized PCA training - /// - [Obsolete] - public int? Oversampling { get; set; } - - /// - /// If enabled, data is centered to be zero mean - /// - [Obsolete] - public bool? Center { get; set; } - - /// - /// The seed for random number generation - /// - [Obsolete] - public int? Seed { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class PcaCalculator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public PcaCalculator() - { - } - - public PcaCalculator(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public PcaCalculator(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public PcaTransformColumn[] Column { get; set; } - - /// - /// The name of the weight column - /// - [Obsolete] - public string WeightColumn { get; set; } - - /// - /// The number of components in the PCA - /// - [Obsolete] - public int Rank { get; set; } = 20; - - /// - /// Oversampling parameter for randomized PCA training - /// - [Obsolete] - public int Oversampling { get; set; } = 20; - - /// - /// If enabled, data is centered to be zero mean - /// - [Obsolete] - public bool Center { get; set; } = true; - - /// - /// The seed for random number generation - /// - [Obsolete] - public int Seed { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PcaCalculator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PcaCalculatorPipelineStep(output); - } - - [Obsolete] - private class PcaCalculatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public PcaCalculatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Transforms a predicted label column to its original values, unless it is of type bool. - /// - [Obsolete] - public sealed partial class PredictedLabelColumnOriginalValueConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// The predicted label column - /// - [Obsolete] - public string PredictedLabelColumn { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(PredictedLabelColumnOriginalValueConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new PredictedLabelColumnOriginalValueConverterPipelineStep(output); - } - - [Obsolete] - private class PredictedLabelColumnOriginalValueConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public PredictedLabelColumnOriginalValueConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class GenerateNumberTransformColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Use an auto-incremented integer starting at zero instead of a random number - /// - [Obsolete] - public bool? UseCounter { get; set; } - - /// - /// The random seed - /// - [Obsolete] - public uint? Seed { get; set; } - - } - - /// - /// Adds a column with a generated number sequence. - /// - [Obsolete] - public sealed partial class RandomNumberGenerator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// New column definition(s) (optional form: name:seed) - /// - [Obsolete] - public GenerateNumberTransformColumn[] Column { get; set; } - - /// - /// Use an auto-incremented integer starting at zero instead of a random number - /// - [Obsolete] - public bool UseCounter { get; set; } = false; - - /// - /// The random seed - /// - [Obsolete] - public uint Seed { get; set; } = 42; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(RandomNumberGenerator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new RandomNumberGeneratorPipelineStep(output); - } - - [Obsolete] - private class RandomNumberGeneratorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public RandomNumberGeneratorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values. - /// - [Obsolete] - public sealed partial class RowRangeFilter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Column - /// - [Obsolete] - public string Column { get; set; } - - /// - /// Minimum value (0 to 1 for key types) - /// - [Obsolete] - public double? Min { get; set; } - - /// - /// Maximum value (0 to 1 for key types) - /// - [Obsolete] - public double? Max { get; set; } - - /// - /// If true, keep the values that fall outside the range. - /// - [Obsolete] - public bool Complement { get; set; } = false; - - /// - /// If true, include in the range the values that are equal to min. - /// - [Obsolete] - public bool IncludeMin { get; set; } = true; - - /// - /// If true, include in the range the values that are equal to max. - /// - [Obsolete] - public bool? IncludeMax { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(RowRangeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new RowRangeFilterPipelineStep(output); - } - - [Obsolete] - private class RowRangeFilterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public RowRangeFilterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Allows limiting input to a subset of rows at an optional offset. Can be used to implement data paging. - /// - [Obsolete] - public sealed partial class RowSkipAndTakeFilter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of items to skip - /// - [Obsolete] - public long? Skip { get; set; } - - /// - /// Number of items to take - /// - [Obsolete] - public long? Take { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(RowSkipAndTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new RowSkipAndTakeFilterPipelineStep(output); - } - - [Obsolete] - private class RowSkipAndTakeFilterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public RowSkipAndTakeFilterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Allows limiting input to a subset of rows by skipping a number of rows. - /// - [Obsolete] - public sealed partial class RowSkipFilter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of items to skip - /// - [Obsolete] - public long Count { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(RowSkipFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new RowSkipFilterPipelineStep(output); - } - - [Obsolete] - private class RowSkipFilterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public RowSkipFilterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Allows limiting input to a subset of rows by taking N first rows. - /// - [Obsolete] - public sealed partial class RowTakeFilter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Number of items to take - /// - [Obsolete] - public long Count { get; set; } = 9223372036854775807; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(RowTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new RowTakeFilterPipelineStep(output); - } - - [Obsolete] - private class RowTakeFilterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public RowTakeFilterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Selects only the last score columns and the extra columns specified in the arguments. - /// - [Obsolete] - public sealed partial class ScoreColumnSelector : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Extra columns to write - /// - [Obsolete] - public string[] ExtraColumns { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(ScoreColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new ScoreColumnSelectorPipelineStep(output); - } - - [Obsolete] - private class ScoreColumnSelectorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public ScoreColumnSelectorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Turn the predictor model into a transform model - /// - [Obsolete] - public sealed partial class Scorer - { - - - /// - /// The predictor model to turn into a transform - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// The scored dataset - /// - public Var ScoredData { get; set; } = new Var(); - - /// - /// The scoring transform - /// - public Var ScoringTransform { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum UngroupTransformUngroupMode - { - Inner = 0, - Outer = 1, - First = 2 - } - - - /// - /// - [Obsolete] - public sealed partial class Segregator : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to unroll, or 'pivot' - /// - [Obsolete] - public string[] Column { get; set; } - - /// - /// Specifies how to unroll multiple pivot columns of different size. - /// - [Obsolete] - public UngroupTransformUngroupMode Mode { get; set; } = UngroupTransformUngroupMode.Inner; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SegregatorPipelineStep(output); - } - - [Obsolete] - private class SegregatorPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public SegregatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class SentimentAnalyzer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Name of the source column. - /// - [Obsolete] - public string Source { get; set; } - - /// - /// Name of the new column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(SentimentAnalyzer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SentimentAnalyzerPipelineStep(output); - } - - [Obsolete] - private class SentimentAnalyzerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public SentimentAnalyzerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class TensorFlowScorer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// TensorFlow model used by the transform. Please see https://www.tensorflow.org/mobile/prepare_models for more details. - /// - [Obsolete] - public string ModelLocation { get; set; } - - /// - /// The names of the model inputs - /// - [Obsolete] - public string[] InputColumns { get; set; } - - /// - /// The name of the outputs - /// - [Obsolete] - public string[] OutputColumns { get; set; } - - /// - /// Training labels. - /// - [Obsolete] - public string LabelColumn { get; set; } - - /// - /// TensorFlow label node. - /// - [Obsolete] - public string TensorFlowLabel { get; set; } - - /// - /// The name of the optimization operation in the TensorFlow graph. - /// - [Obsolete] - public string OptimizationOperation { get; set; } - - /// - /// The name of the operation in the TensorFlow graph to compute training loss (Optional) - /// - [Obsolete] - public string LossOperation { get; set; } - - /// - /// The name of the operation in the TensorFlow graph to compute performance metric during training (Optional) - /// - [Obsolete] - public string MetricOperation { get; set; } - - /// - /// Number of samples to use for mini-batch training. - /// - [Obsolete] - public int BatchSize { get; set; } = 64; - - /// - /// Number of training iterations. - /// - [Obsolete] - public int Epoch { get; set; } = 5; - - /// - /// The name of the operation in the TensorFlow graph which sets optimizer learning rate (Optional). - /// - [Obsolete] - public string LearningRateOperation { get; set; } - - /// - /// Learning rate to use during optimization. - /// - [Obsolete] - public float LearningRate { get; set; } = 0.01f; - - /// - /// Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk. - /// - [Obsolete] - public string SaveLocationOperation { get; set; } = "save/Const"; - - /// - /// Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk. - /// - [Obsolete] - public string SaveOperation { get; set; } = "save/control_dependency"; - - /// - /// Retrain TensorFlow model. - /// - [Obsolete] - public bool ReTrain { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(TensorFlowScorer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new TensorFlowScorerPipelineStep(output); - } - - [Obsolete] - private class TensorFlowScorerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public TensorFlowScorerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum TextFeaturizingEstimatorLanguage - { - English = 1, - French = 2, - German = 3, - Dutch = 4, - Italian = 5, - Spanish = 6, - Japanese = 7 - } - - [Obsolete] - public enum TextNormalizingEstimatorCaseNormalizationMode - { - Lower = 0, - Upper = 1, - None = 2 - } - - [Obsolete] - public enum TextFeaturizingEstimatorTextNormKind - { - None = 0, - L1 = 1, - L2 = 2, - LInf = 3 - } - - - [Obsolete] - public sealed partial class TextFeaturizingEstimatorColumn : ManyToOneColumn, IManyToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string[] Source { get; set; } - - } - - [Obsolete] - public sealed partial class TermLoaderArguments - { - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder Sort { get; set; } = ValueToKeyMappingTransformerSortOrder.Occurrence; - - /// - /// Drop unknown terms instead of mapping them to NA term. - /// - [Obsolete] - public bool DropUnknowns { get; set; } = false; - - } - - /// - /// - [Obsolete] - public sealed partial class TextFeaturizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public TextFeaturizer() - { - } - - public TextFeaturizer(string outputColumn, params string[] inputColumns) - { - AddColumn(outputColumn, inputColumns); - } - - public void AddColumn(string name, params string[] source) - { - Column = ManyToOneColumn.Create(name, source); - } - - - /// - /// New column definition (optional form: name:srcs). - /// - [Obsolete] - public TextFeaturizingEstimatorColumn Column { get; set; } - - /// - /// Dataset language or 'AutoDetect' to detect language per row. - /// - [Obsolete] - public TextFeaturizingEstimatorLanguage Language { get; set; } = TextFeaturizingEstimatorLanguage.English; - - /// - /// Use stop remover or not. - /// - [Obsolete] - public bool UsePredefinedStopWordRemover { get; set; } = false; - - /// - /// Casing text using the rules of the invariant culture. - /// - [Obsolete] - public TextNormalizingEstimatorCaseNormalizationMode TextCase { get; set; } = TextNormalizingEstimatorCaseNormalizationMode.Lower; - - /// - /// Whether to keep diacritical marks or remove them. - /// - [Obsolete] - public bool KeepDiacritics { get; set; } = false; - - /// - /// Whether to keep punctuation marks or remove them. - /// - [Obsolete] - public bool KeepPunctuations { get; set; } = true; - - /// - /// Whether to keep numbers or remove them. - /// - [Obsolete] - public bool KeepNumbers { get; set; } = true; - - /// - /// Whether to output the transformed text tokens as an additional column. - /// - [Obsolete] - public bool OutputTokens { get; set; } = false; - - /// - /// A dictionary of whitelisted terms. - /// - [Obsolete] - public TermLoaderArguments Dictionary { get; set; } - - /// - /// Ngram feature extractor to use for words (WordBag/WordHashBag). - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public NgramExtractor WordFeatureExtractor { get; set; } = new NGramNgramExtractor(); - - /// - /// Ngram feature extractor to use for characters (WordBag/WordHashBag). - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public NgramExtractor CharFeatureExtractor { get; set; } = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }; - - /// - /// Normalize vectors (rows) individually by rescaling them to unit norm. - /// - [Obsolete] - public TextFeaturizingEstimatorTextNormKind VectorNormalizer { get; set; } = TextFeaturizingEstimatorTextNormKind.L2; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(TextFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new TextFeaturizerPipelineStep(output); - } - - [Obsolete] - private class TextFeaturizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public TextFeaturizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// - [Obsolete] - public sealed partial class TextToKeyConverter : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public TextToKeyConverter() - { - } - - public TextToKeyConverter(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public TextToKeyConverter(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public ValueToKeyMappingTransformerColumn[] Column { get; set; } - - /// - /// Maximum number of terms to keep per column when auto-training - /// - [Obsolete] - public int MaxNumTerms { get; set; } = 1000000; - - /// - /// List of terms - /// - [Obsolete] - public string[] Term { get; set; } - - /// - /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - /// - [Obsolete] - public ValueToKeyMappingTransformerSortOrder Sort { get; set; } = ValueToKeyMappingTransformerSortOrder.Occurrence; - - /// - /// Whether key value metadata should be text, regardless of the actual input type - /// - [Obsolete] - public bool TextKeyValues { get; set; } = false; - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(TextToKeyConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new TextToKeyConverterPipelineStep(output); - } - - [Obsolete] - private class TextToKeyConverterPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public TextToKeyConverterPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Split the dataset into train and test sets - /// - [Obsolete] - public sealed partial class TrainTestDatasetSplitter - { - - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - /// - /// Fraction of training data - /// - [Obsolete] - public float Fraction { get; set; } = 0.8f; - - /// - /// Stratification column - /// - [Obsolete] - public string StratificationColumn { get; set; } - - - [Obsolete] - public sealed class Output - { - /// - /// Training data - /// - public Var TrainData { get; set; } = new Var(); - - /// - /// Testing data - /// - public Var TestData { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - - /// - [Obsolete] - public sealed partial class TreeLeafFeaturizer : Microsoft.ML.EntryPoints.CommonInputs.IFeaturizerInput, Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Output column: The suffix to append to the default column names - /// - [Obsolete] - public string Suffix { get; set; } - - /// - /// If specified, determines the permutation seed for applying this featurizer to a multiclass problem. - /// - [Obsolete] - public int LabelPermutationSeed { get; set; } - - /// - /// Trainer to use - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(TreeLeafFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new TreeLeafFeaturizerPipelineStep(output); - } - - [Obsolete] - private class TreeLeafFeaturizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public TreeLeafFeaturizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - /// - /// Combines a TransformModel and a PredictorModel into a single PredictorModel. - /// - [Obsolete] - public sealed partial class TwoHeterogeneousModelCombiner - { - - - /// - /// Transform model - /// - [Obsolete] - public Var TransformModel { get; set; } = new Var(); - - /// - /// Predictor model - /// - [Obsolete] - public Var PredictorModel { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output - { - /// - /// Predictor model - /// - public Var PredictorModel { get; set; } = new Var(); - - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class VectorToImageTransformColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Whether to use alpha channel - /// - [Obsolete] - public bool? ContainsAlpha { get; set; } - - /// - /// Whether to use red channel - /// - [Obsolete] - public bool? ContainsRed { get; set; } - - /// - /// Whether to use green channel - /// - [Obsolete] - public bool? ContainsGreen { get; set; } - - /// - /// Whether to use blue channel - /// - [Obsolete] - public bool? ContainsBlue { get; set; } - - /// - /// Whether to separate each channel or interleave in ARGB order - /// - [Obsolete] - public bool? InterleaveArgb { get; set; } - - /// - /// Width of the image - /// - [Obsolete] - public int? ImageWidth { get; set; } - - /// - /// Height of the image - /// - [Obsolete] - public int? ImageHeight { get; set; } - - /// - /// Offset (pre-scale) - /// - [Obsolete] - public float? Offset { get; set; } - - /// - /// Scale factor - /// - [Obsolete] - public float? Scale { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// Converts vector array into image type. - /// - [Obsolete] - public sealed partial class VectorToImage : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public VectorToImage() - { - } - - public VectorToImage(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public VectorToImage(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public VectorToImageTransformColumn[] Column { get; set; } - - /// - /// Whether to use alpha channel - /// - [Obsolete] - public bool ContainsAlpha { get; set; } = false; - - /// - /// Whether to use red channel - /// - [Obsolete] - public bool ContainsRed { get; set; } = true; - - /// - /// Whether to use green channel - /// - [Obsolete] - public bool ContainsGreen { get; set; } = true; - - /// - /// Whether to use blue channel - /// - [Obsolete] - public bool ContainsBlue { get; set; } = true; - - /// - /// Whether to separate each channel or interleave in ARGB order - /// - [Obsolete] - public bool InterleaveArgb { get; set; } = false; - - /// - /// Width of the image - /// - [Obsolete] - public int ImageWidth { get; set; } - - /// - /// Height of the image - /// - [Obsolete] - public int ImageHeight { get; set; } - - /// - /// Offset (pre-scale) - /// - [Obsolete] - public float? Offset { get; set; } - - /// - /// Scale factor - /// - [Obsolete] - public float? Scale { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(VectorToImage)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new VectorToImagePipelineStep(output); - } - - [Obsolete] - private class VectorToImagePipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public VectorToImagePipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - [Obsolete] - public enum WordEmbeddingsExtractingTransformerPretrainedModelKind - { - GloVe50D = 0, - GloVe100D = 1, - GloVe200D = 2, - GloVe300D = 3, - GloVeTwitter25D = 4, - GloVeTwitter50D = 5, - GloVeTwitter100D = 6, - GloVeTwitter200D = 7, - FastTextWikipedia300D = 8, - Sswe = 9 - } - - - [Obsolete] - public sealed partial class WordEmbeddingsExtractingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class WordEmbeddings : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public WordEmbeddings() - { - } - - public WordEmbeddings(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public WordEmbeddings(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) (optional form: name:src) - /// - [Obsolete] - public WordEmbeddingsExtractingTransformerColumn[] Column { get; set; } - - /// - /// Pre-trained model used to create the vocabulary - /// - [Obsolete] - public WordEmbeddingsExtractingTransformerPretrainedModelKind? ModelKind { get; set; } = WordEmbeddingsExtractingTransformerPretrainedModelKind.Sswe; - - /// - /// Filename for custom word embedding model - /// - [Obsolete] - public string CustomLookupTable { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(WordEmbeddings)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new WordEmbeddingsPipelineStep(output); - } - - [Obsolete] - private class WordEmbeddingsPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public WordEmbeddingsPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - namespace Legacy.Transforms - { - - [Obsolete] - public sealed partial class WordTokenizingTransformerColumn : OneToOneColumn, IOneToOneColumn - { - /// - /// Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character. - /// - [Obsolete] - public string TermSeparators { get; set; } - - /// - /// Name of the new column - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Name of the source column - /// - [Obsolete] - public string Source { get; set; } - - } - - /// - /// - [Obsolete] - public sealed partial class WordTokenizer : Microsoft.ML.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - public WordTokenizer() - { - } - - public WordTokenizer(params string[] inputColumns) - { - if (inputColumns != null) - { - foreach (string input in inputColumns) - { - AddColumn(input); - } - } - } - - public WordTokenizer(params (string inputColumn, string outputColumn)[] inputOutputColumns) - { - if (inputOutputColumns != null) - { - foreach (var inputOutput in inputOutputColumns) - { - AddColumn(inputOutput.outputColumn, inputOutput.inputColumn); - } - } - } - - public void AddColumn(string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(inputColumn)); - Column = list.ToArray(); - } - - public void AddColumn(string outputColumn, string inputColumn) - { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(outputColumn, inputColumn)); - Column = list.ToArray(); - } - - - /// - /// New column definition(s) - /// - [Obsolete] - public WordTokenizingTransformerColumn[] Column { get; set; } - - /// - /// Array of single character term separator(s). By default uses space character separator. - /// - [Obsolete] - public char[] CharArrayTermSeparators { get; set; } - - /// - /// Input dataset - /// - [Obsolete] - public Var Data { get; set; } = new Var(); - - - [Obsolete] - public sealed class Output : Microsoft.ML.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - [Obsolete] - public Var GetInputData() => Data; - - [Obsolete] - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(WordTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new WordTokenizerPipelineStep(output); - } - - [Obsolete] - private class WordTokenizerPipelineStep : ILearningPipelineDataStep - { - [Obsolete] - public WordTokenizerPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - [Obsolete] - public Var Data { get; } - [Obsolete] - public Var Model { get; } - } - } - } - - [Obsolete] - public abstract class BoosterParameterFunction : ComponentKind {} - - - - /// - /// Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866 - /// - [Obsolete] - public sealed class DartBoosterParameterFunction : BoosterParameterFunction - { - /// - /// Drop ratio for trees. Range:(0,1). - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double DropRate { get; set; } = 0.1d; - - /// - /// Max number of dropped tree in a boosting round. - /// - [TlcModule.Range(Inf = 0, Max = 2147483647)] - [Obsolete] - public int MaxDrop { get; set; } = 1; - - /// - /// Probability for not perform dropping in a boosting round. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double SkipDrop { get; set; } = 0.5d; - - /// - /// True will enable xgboost dart mode. - /// - [Obsolete] - public bool XgboostDartMode { get; set; } = false; - - /// - /// True will enable uniform drop. - /// - [Obsolete] - public bool UniformDrop { get; set; } = false; - - /// - /// Use for binary classification when classes are not balanced. - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinSplitGain { get; set; } - - /// - /// Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int MaxDepth { get; set; } - - /// - /// Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinChildWeight { get; set; } = 0.1d; - - /// - /// Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int SubsampleFreq { get; set; } - - /// - /// Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double Subsample { get; set; } = 1d; - - /// - /// Subsample ratio of columns when constructing each tree. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// L2 regularization term on weights, increasing this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegLambda", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegLambda { get; set; } = 0.01d; - - /// - /// L1 regularization term on weights, increase this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegAlpha", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegAlpha { get; set; } - - /// - /// Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). - /// - [Obsolete] - public double ScalePosWeight { get; set; } = 1d; - - [Obsolete] - internal override string ComponentName => "dart"; - } - - - - /// - /// Traditional Gradient Boosting Decision Tree. - /// - [Obsolete] - public sealed class GbdtBoosterParameterFunction : BoosterParameterFunction - { - /// - /// Use for binary classification when classes are not balanced. - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinSplitGain { get; set; } - - /// - /// Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int MaxDepth { get; set; } - - /// - /// Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinChildWeight { get; set; } = 0.1d; - - /// - /// Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int SubsampleFreq { get; set; } - - /// - /// Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double Subsample { get; set; } = 1d; - - /// - /// Subsample ratio of columns when constructing each tree. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// L2 regularization term on weights, increasing this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegLambda", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegLambda { get; set; } = 0.01d; - - /// - /// L1 regularization term on weights, increase this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegAlpha", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegAlpha { get; set; } - - /// - /// Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). - /// - [Obsolete] - public double ScalePosWeight { get; set; } = 1d; - - [Obsolete] - internal override string ComponentName => "gbdt"; - } - - - - /// - /// Gradient-based One-Side Sampling. - /// - [Obsolete] - public sealed class GossBoosterParameterFunction : BoosterParameterFunction - { - /// - /// Retain ratio for large gradient instances. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double TopRate { get; set; } = 0.2d; - - /// - /// Retain ratio for small gradient instances. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double OtherRate { get; set; } = 0.1d; - - /// - /// Use for binary classification when classes are not balanced. - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinSplitGain { get; set; } - - /// - /// Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int MaxDepth { get; set; } - - /// - /// Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. - /// - [TlcModule.Range(Min = 0d)] - [Obsolete] - public double MinChildWeight { get; set; } = 0.1d; - - /// - /// Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos. - /// - [TlcModule.Range(Min = 0, Max = 2147483647)] - [Obsolete] - public int SubsampleFreq { get; set; } - - /// - /// Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double Subsample { get; set; } = 1d; - - /// - /// Subsample ratio of columns when constructing each tree. Range: (0,1]. - /// - [TlcModule.Range(Inf = 0d, Max = 1d)] - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// L2 regularization term on weights, increasing this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegLambda", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegLambda { get; set; } = 0.01d; - - /// - /// L1 regularization term on weights, increase this value will make model more conservative. - /// - [TlcModule.Range(Min = 0d)] - [TlcModule.SweepableDiscreteParamAttribute("RegAlpha", new object[]{0f, 0.5f, 1f})] - [Obsolete] - public double RegAlpha { get; set; } - - /// - /// Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). - /// - [Obsolete] - public double ScalePosWeight { get; set; } = 1d; - - [Obsolete] - internal override string ComponentName => "goss"; - } - - [Obsolete] - public abstract class CalibratorTrainer : ComponentKind {} - - - - [Obsolete] - public sealed class FixedPlattCalibratorCalibratorTrainer : CalibratorTrainer - { - /// - /// The slope parameter of f(x) = 1 / (1 + exp(-slope * x + offset) - /// - [Obsolete] - public double Slope { get; set; } = 1d; - - /// - /// The offset parameter of f(x) = 1 / (1 + exp(-slope * x + offset) - /// - [Obsolete] - public double Offset { get; set; } - - [Obsolete] - internal override string ComponentName => "FixedPlattCalibrator"; - } - - - - [Obsolete] - public sealed class NaiveCalibratorCalibratorTrainer : CalibratorTrainer - { - [Obsolete] - internal override string ComponentName => "NaiveCalibrator"; - } - - - - [Obsolete] - public sealed class PavCalibratorCalibratorTrainer : CalibratorTrainer - { - [Obsolete] - internal override string ComponentName => "PavCalibrator"; - } - - - - /// - /// Platt calibration. - /// - [Obsolete] - public sealed class PlattCalibratorCalibratorTrainer : CalibratorTrainer - { - [Obsolete] - internal override string ComponentName => "PlattCalibrator"; - } - - [Obsolete] - public abstract class ClassificationLossFunction : ComponentKind {} - - - - /// - /// Exponential loss. - /// - [Obsolete] - public sealed class ExpLossClassificationLossFunction : ClassificationLossFunction - { - /// - /// Beta (dilation) - /// - [Obsolete] - public float Beta { get; set; } = 1f; - - [Obsolete] - internal override string ComponentName => "ExpLoss"; - } - - - - /// - /// Hinge loss. - /// - [Obsolete] - public sealed class HingeLossClassificationLossFunction : ClassificationLossFunction - { - /// - /// Margin value - /// - [Obsolete] - public float Margin { get; set; } = 1f; - - [Obsolete] - internal override string ComponentName => "HingeLoss"; - } - - - - /// - /// Log loss. - /// - [Obsolete] - public sealed class LogLossClassificationLossFunction : ClassificationLossFunction - { - [Obsolete] - internal override string ComponentName => "LogLoss"; - } - - - - /// - /// Smoothed Hinge loss. - /// - [Obsolete] - public sealed class SmoothedHingeLossClassificationLossFunction : ClassificationLossFunction - { - /// - /// Smoothing constant - /// - [Obsolete] - public float SmoothingConst { get; set; } = 1f; - - [Obsolete] - internal override string ComponentName => "SmoothedHingeLoss"; - } - - [Obsolete] - public abstract class EarlyStoppingCriterion : ComponentKind {} - - - - /// - /// Stop in case of loss of generality. - /// - [Obsolete] - public sealed class GLEarlyStoppingCriterion : EarlyStoppingCriterion - { - /// - /// Threshold in range [0,1]. - /// - [TlcModule.Range(Min = 0f, Max = 1f)] - [Obsolete] - public float Threshold { get; set; } = 0.01f; - - [Obsolete] - internal override string ComponentName => "GL"; - } - - - - /// - /// Stops in case of low progress. - /// - [Obsolete] - public sealed class LPEarlyStoppingCriterion : EarlyStoppingCriterion - { - /// - /// Threshold in range [0,1]. - /// - [TlcModule.Range(Min = 0f, Max = 1f)] - [Obsolete] - public float Threshold { get; set; } = 0.01f; - - /// - /// The window size. - /// - [TlcModule.Range(Inf = 0)] - [Obsolete] - public int WindowSize { get; set; } = 5; - - [Obsolete] - internal override string ComponentName => "LP"; - } - - - - /// - /// Stops in case of generality to progress ration exceeds threshold. - /// - [Obsolete] - public sealed class PQEarlyStoppingCriterion : EarlyStoppingCriterion - { - /// - /// Threshold in range [0,1]. - /// - [TlcModule.Range(Min = 0f, Max = 1f)] - [Obsolete] - public float Threshold { get; set; } = 0.01f; - - /// - /// The window size. - /// - [TlcModule.Range(Inf = 0)] - [Obsolete] - public int WindowSize { get; set; } = 5; - - [Obsolete] - internal override string ComponentName => "PQ"; - } - - - - /// - /// Stop if validation score exceeds threshold value. - /// - [Obsolete] - public sealed class TREarlyStoppingCriterion : EarlyStoppingCriterion - { - /// - /// Tolerance threshold. (Non negative value) - /// - [TlcModule.Range(Min = 0f)] - [Obsolete] - public float Threshold { get; set; } = 0.01f; - - [Obsolete] - internal override string ComponentName => "TR"; - } - - - - /// - /// Stops in case of consecutive loss in generality. - /// - [Obsolete] - public sealed class UPEarlyStoppingCriterion : EarlyStoppingCriterion - { - /// - /// The window size. - /// - [TlcModule.Range(Inf = 0)] - [Obsolete] - public int WindowSize { get; set; } = 5; - - [Obsolete] - internal override string ComponentName => "UP"; - } - - [Obsolete] - public abstract class EnsembleBinaryDiversityMeasure : ComponentKind {} - - - - [Obsolete] - public sealed class DisagreementDiversityMeasureEnsembleBinaryDiversityMeasure : EnsembleBinaryDiversityMeasure - { - [Obsolete] - internal override string ComponentName => "DisagreementDiversityMeasure"; - } - - [Obsolete] - public abstract class EnsembleBinaryOutputCombiner : ComponentKind {} - - - - [Obsolete] - public sealed class AverageEnsembleBinaryOutputCombiner : EnsembleBinaryOutputCombiner - { - [Obsolete] - internal override string ComponentName => "Average"; - } - - - - [Obsolete] - public sealed class MedianEnsembleBinaryOutputCombiner : EnsembleBinaryOutputCombiner - { - [Obsolete] - internal override string ComponentName => "Median"; - } - - - - [Obsolete] - public sealed class StackingEnsembleBinaryOutputCombiner : EnsembleBinaryOutputCombiner - { - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "Stacking"; - } - - - - [Obsolete] - public sealed class VotingEnsembleBinaryOutputCombiner : EnsembleBinaryOutputCombiner - { - [Obsolete] - internal override string ComponentName => "Voting"; - } - - [Obsolete] - public enum WeightageKind - { - Accuracy = 0, - Auc = 1, - PosPrecision = 2, - PosRecall = 3, - NegPrecision = 4, - NegRecall = 5 - } - - - - [Obsolete] - public sealed class WeightedAverageEnsembleBinaryOutputCombiner : EnsembleBinaryOutputCombiner - { - /// - /// The metric type to be used to find the weights for each model - /// - [Obsolete] - public WeightageKind WeightageName { get; set; } = WeightageKind.Auc; - - [Obsolete] - internal override string ComponentName => "WeightedAverage"; - } - - [Obsolete] - public abstract class EnsembleBinarySubModelSelector : ComponentKind {} - - - - [Obsolete] - public sealed class AllSelectorEnsembleBinarySubModelSelector : EnsembleBinarySubModelSelector - { - [Obsolete] - internal override string ComponentName => "AllSelector"; - } - - - - [Obsolete] - public sealed class BestDiverseSelectorEnsembleBinarySubModelSelector : EnsembleBinarySubModelSelector - { - /// - /// The metric type to be used to find the diversity among base learners - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleBinaryDiversityMeasure DiversityMetricType { get; set; } = new DisagreementDiversityMeasureEnsembleBinaryDiversityMeasure(); - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestDiverseSelector"; - } - - [Obsolete] - public enum BinaryClassifierEvaluatorMetrics - { - Accuracy = 0, - PosPrecName = 1, - PosRecallName = 2, - NegPrecName = 3, - NegRecallName = 4, - Auc = 5, - LogLoss = 6, - LogLossReduction = 7, - F1 = 8, - AuPrc = 9 - } - - - - [Obsolete] - public sealed class BestPerformanceSelectorEnsembleBinarySubModelSelector : EnsembleBinarySubModelSelector - { - /// - /// The metric type to be used to find the best performance - /// - [Obsolete] - public BinaryClassifierEvaluatorMetrics MetricName { get; set; } = BinaryClassifierEvaluatorMetrics.Auc; - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestPerformanceSelector"; - } - - [Obsolete] - public abstract class EnsembleFeatureSelector : ComponentKind {} - - - - [Obsolete] - public sealed class AllFeatureSelectorEnsembleFeatureSelector : EnsembleFeatureSelector - { - [Obsolete] - internal override string ComponentName => "AllFeatureSelector"; - } - - - - [Obsolete] - public sealed class RandomFeatureSelectorEnsembleFeatureSelector : EnsembleFeatureSelector - { - /// - /// The proportion of features to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float FeaturesSelectionProportion { get; set; } = 0.8f; - - [Obsolete] - internal override string ComponentName => "RandomFeatureSelector"; - } - - [Obsolete] - public abstract class EnsembleMulticlassDiversityMeasure : ComponentKind {} - - - - [Obsolete] - public sealed class MultiDisagreementDiversityMeasureEnsembleMulticlassDiversityMeasure : EnsembleMulticlassDiversityMeasure - { - [Obsolete] - internal override string ComponentName => "MultiDisagreementDiversityMeasure"; - } - - [Obsolete] - public abstract class EnsembleMulticlassOutputCombiner : ComponentKind {} - - - - [Obsolete] - public sealed class MultiAverageEnsembleMulticlassOutputCombiner : EnsembleMulticlassOutputCombiner - { - /// - /// Whether to normalize the output of base models before combining them - /// - [Obsolete] - public bool Normalize { get; set; } = true; - - [Obsolete] - internal override string ComponentName => "MultiAverage"; - } - - - - [Obsolete] - public sealed class MultiMedianEnsembleMulticlassOutputCombiner : EnsembleMulticlassOutputCombiner - { - /// - /// Whether to normalize the output of base models before combining them - /// - [Obsolete] - public bool Normalize { get; set; } = true; - - [Obsolete] - internal override string ComponentName => "MultiMedian"; - } - - - - [Obsolete] - public sealed class MultiStackingEnsembleMulticlassOutputCombiner : EnsembleMulticlassOutputCombiner - { - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "MultiStacking"; - } - - - - [Obsolete] - public sealed class MultiVotingEnsembleMulticlassOutputCombiner : EnsembleMulticlassOutputCombiner - { - [Obsolete] - internal override string ComponentName => "MultiVoting"; - } - - [Obsolete] - public enum MultiWeightageKind - { - AccuracyMicroAvg = 0, - AccuracyMacroAvg = 1 - } - - - - [Obsolete] - public sealed class MultiWeightedAverageEnsembleMulticlassOutputCombiner : EnsembleMulticlassOutputCombiner - { - /// - /// The metric type to be used to find the weights for each model - /// - [Obsolete] - public MultiWeightageKind WeightageName { get; set; } = MultiWeightageKind.AccuracyMicroAvg; - - /// - /// Whether to normalize the output of base models before combining them - /// - [Obsolete] - public bool Normalize { get; set; } = true; - - [Obsolete] - internal override string ComponentName => "MultiWeightedAverage"; - } - - [Obsolete] - public abstract class EnsembleMulticlassSubModelSelector : ComponentKind {} - - - - [Obsolete] - public sealed class AllSelectorMultiClassEnsembleMulticlassSubModelSelector : EnsembleMulticlassSubModelSelector - { - [Obsolete] - internal override string ComponentName => "AllSelectorMultiClass"; - } - - - - [Obsolete] - public sealed class BestDiverseSelectorMultiClassEnsembleMulticlassSubModelSelector : EnsembleMulticlassSubModelSelector - { - /// - /// The metric type to be used to find the diversity among base learners - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleMulticlassDiversityMeasure DiversityMetricType { get; set; } = new MultiDisagreementDiversityMeasureEnsembleMulticlassDiversityMeasure(); - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestDiverseSelectorMultiClass"; - } - - [Obsolete] - public enum MultiClassClassifierEvaluatorMetrics - { - AccuracyMicro = 0, - AccuracyMacro = 1, - LogLoss = 2, - LogLossReduction = 3 - } - - - - [Obsolete] - public sealed class BestPerformanceSelectorMultiClassEnsembleMulticlassSubModelSelector : EnsembleMulticlassSubModelSelector - { - /// - /// The metric type to be used to find the best performance - /// - [Obsolete] - public MultiClassClassifierEvaluatorMetrics MetricName { get; set; } = MultiClassClassifierEvaluatorMetrics.AccuracyMicro; - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestPerformanceSelectorMultiClass"; - } - - [Obsolete] - public abstract class EnsembleRegressionDiversityMeasure : ComponentKind {} - - - - [Obsolete] - public sealed class RegressionDisagreementDiversityMeasureEnsembleRegressionDiversityMeasure : EnsembleRegressionDiversityMeasure - { - [Obsolete] - internal override string ComponentName => "RegressionDisagreementDiversityMeasure"; - } - - [Obsolete] - public abstract class EnsembleRegressionOutputCombiner : ComponentKind {} - - - - [Obsolete] - public sealed class AverageEnsembleRegressionOutputCombiner : EnsembleRegressionOutputCombiner - { - [Obsolete] - internal override string ComponentName => "Average"; - } - - - - [Obsolete] - public sealed class MedianEnsembleRegressionOutputCombiner : EnsembleRegressionOutputCombiner - { - [Obsolete] - internal override string ComponentName => "Median"; - } - - - - [Obsolete] - public sealed class RegressionStackingEnsembleRegressionOutputCombiner : EnsembleRegressionOutputCombiner - { - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "RegressionStacking"; - } - - [Obsolete] - public abstract class EnsembleRegressionSubModelSelector : ComponentKind {} - - - - [Obsolete] - public sealed class AllSelectorEnsembleRegressionSubModelSelector : EnsembleRegressionSubModelSelector - { - [Obsolete] - internal override string ComponentName => "AllSelector"; - } - - - - [Obsolete] - public sealed class BestDiverseSelectorRegressionEnsembleRegressionSubModelSelector : EnsembleRegressionSubModelSelector - { - /// - /// The metric type to be used to find the diversity among base learners - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleRegressionDiversityMeasure DiversityMetricType { get; set; } = new RegressionDisagreementDiversityMeasureEnsembleRegressionDiversityMeasure(); - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestDiverseSelectorRegression"; - } - - [Obsolete] - public enum RegressionEvaluatorMetrics - { - L1 = 0, - L2 = 1, - Rms = 2, - Loss = 3, - RSquared = 4 - } - - - - [Obsolete] - public sealed class BestPerformanceRegressionSelectorEnsembleRegressionSubModelSelector : EnsembleRegressionSubModelSelector - { - /// - /// The metric type to be used to find the best performance - /// - [Obsolete] - public RegressionEvaluatorMetrics MetricName { get; set; } = RegressionEvaluatorMetrics.L1; - - /// - /// The proportion of best base learners to be selected. The range is 0.0-1.0 - /// - [Obsolete] - public float LearnersSelectionProportion { get; set; } = 0.5f; - - /// - /// The proportion of instances to be selected to test the individual base learner. If it is 0, it uses training set - /// - [Obsolete] - public float ValidationDatasetProportion { get; set; } = 0.3f; - - [Obsolete] - internal override string ComponentName => "BestPerformanceRegressionSelector"; - } - - [Obsolete] - public abstract class EnsembleSubsetSelector : ComponentKind {} - - - - [Obsolete] - public sealed class AllInstanceSelectorEnsembleSubsetSelector : EnsembleSubsetSelector - { - /// - /// The Feature selector - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleFeatureSelector FeatureSelector { get; set; } = new AllFeatureSelectorEnsembleFeatureSelector(); - - [Obsolete] - internal override string ComponentName => "AllInstanceSelector"; - } - - - - [Obsolete] - public sealed class BootstrapSelectorEnsembleSubsetSelector : EnsembleSubsetSelector - { - /// - /// The Feature selector - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleFeatureSelector FeatureSelector { get; set; } = new AllFeatureSelectorEnsembleFeatureSelector(); - - [Obsolete] - internal override string ComponentName => "BootstrapSelector"; - } - - - - [Obsolete] - public sealed class RandomPartitionSelectorEnsembleSubsetSelector : EnsembleSubsetSelector - { - /// - /// The Feature selector - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EnsembleFeatureSelector FeatureSelector { get; set; } = new AllFeatureSelectorEnsembleFeatureSelector(); - - [Obsolete] - internal override string ComponentName => "RandomPartitionSelector"; - } - - [Obsolete] - public abstract class FastTreeTrainer : ComponentKind {} - - - - /// - /// Uses a logit-boost boosted tree learner to perform binary classification. - /// - [Obsolete] - public sealed class FastTreeBinaryClassificationFastTreeTrainer : FastTreeTrainer - { - /// - /// Should we use derivatives optimized for unbalanced sets - /// - [Obsolete] - public bool UnbalancedSets { get; set; } = false; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Legacy.Trainers.Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - [Obsolete] - internal override string ComponentName => "FastTreeBinaryClassification"; - } - - - - /// - /// Trains gradient boosted decision trees to the LambdaRank quasi-gradient. - /// - [Obsolete] - public sealed class FastTreeRankingFastTreeTrainer : FastTreeTrainer - { - /// - /// Comma seperated list of gains associated to each relevance label. - /// - [Obsolete] - public string CustomGains { get; set; } = "0,3,7,15,31"; - - /// - /// Train DCG instead of NDCG - /// - [Obsolete] - public bool TrainDcg { get; set; } = false; - - /// - /// The sorting algorithm to use for DCG and LambdaMart calculations [DescendingStablePessimistic/DescendingStable/DescendingReverse/DescendingDotNet] - /// - [Obsolete] - public string SortingAlgorithm { get; set; } = "DescendingStablePessimistic"; - - /// - /// max-NDCG truncation to use in the Lambda Mart algorithm - /// - [Obsolete] - public int LambdaMartMaxTruncation { get; set; } = 100; - - /// - /// Use shifted NDCG - /// - [Obsolete] - public bool ShiftedNdcg { get; set; } = false; - - /// - /// Cost function parameter (w/c) - /// - [Obsolete] - public char CostFunctionParam { get; set; } = 'w'; - - /// - /// Distance weight 2 adjustment to cost - /// - [Obsolete] - public bool DistanceWeight2 { get; set; } = false; - - /// - /// Normalize query lambdas - /// - [Obsolete] - public bool NormalizeQueryLambdas { get; set; } = false; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } = 1; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Legacy.Trainers.Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - [Obsolete] - internal override string ComponentName => "FastTreeRanking"; - } - - - - /// - /// Trains gradient boosted decision trees to fit target values using least-squares. - /// - [Obsolete] - public sealed class FastTreeRegressionFastTreeTrainer : FastTreeTrainer - { - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } = 1; - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Legacy.Trainers.Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - [Obsolete] - internal override string ComponentName => "FastTreeRegression"; - } - - - - /// - /// Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner is a generalization of Poisson, compound Poisson, and gamma regression. - /// - [Obsolete] - public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer - { - /// - /// Index parameter for the Tweedie distribution, in the range [1, 2]. 1 is Poisson loss, 2 is gamma loss, and intermediate values are compound Poisson loss. - /// - [Obsolete] - public double Index { get; set; } = 1.5d; - - /// - /// Use best regression step trees? - /// - [Obsolete] - public bool BestStepRankingRegressionTrees { get; set; } = false; - - /// - /// Should we use line search for a step size - /// - [Obsolete] - public bool UseLineSearch { get; set; } = false; - - /// - /// Number of post-bracket line search steps - /// - [Obsolete] - public int NumPostBracketSteps { get; set; } - - /// - /// Minimum line search step size - /// - [Obsolete] - public double MinStepSize { get; set; } - - /// - /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Legacy.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; - - /// - /// Early stopping rule. (Validation set (/valid) is required.) - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public EarlyStoppingCriterion EarlyStoppingRule { get; set; } - - /// - /// Early stopping metrics. (For regression, 1: L1, 2:L2; for ranking, 1:NDCG@1, 3:NDCG@3) - /// - [Obsolete] - public int EarlyStoppingMetrics { get; set; } - - /// - /// Enable post-training pruning to avoid overfitting. (a validation set is required) - /// - [Obsolete] - public bool EnablePruning { get; set; } = false; - - /// - /// Use window and tolerance for pruning - /// - [Obsolete] - public bool UseTolerantPruning { get; set; } = false; - - /// - /// The tolerance threshold for pruning - /// - [Obsolete] - public double PruningThreshold { get; set; } = 0.004d; - - /// - /// The moving window size for pruning - /// - [Obsolete] - public int PruningWindowSize { get; set; } = 5; - - /// - /// The learning rate - /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] - [Obsolete] - public double LearningRates { get; set; } = 0.2d; - - /// - /// Shrinkage - /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] - [Obsolete] - public double Shrinkage { get; set; } = 1d; - - /// - /// Dropout rate for tree regularization - /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] - [Obsolete] - public double DropoutRate { get; set; } - - /// - /// Sample each query 1 in k times in the GetDerivatives function - /// - [Obsolete] - public int GetDerivativesSampleRate { get; set; } = 1; - - /// - /// Write the last ensemble instead of the one determined by early stopping - /// - [Obsolete] - public bool WriteLastEnsemble { get; set; } = false; - - /// - /// Upper bound on absolute value of single tree output - /// - [Obsolete] - public double MaxTreeOutput { get; set; } = 100d; - - /// - /// Training starts from random ordering (determined by /r1) - /// - [Obsolete] - public bool RandomStart { get; set; } = false; - - /// - /// Filter zero lambdas during training - /// - [Obsolete] - public bool FilterZeroLambdas { get; set; } = false; - - /// - /// Freeform defining the scores that should be used as the baseline ranker - /// - [Obsolete] - public string BaselineScoresFormula { get; set; } - - /// - /// Baseline alpha for tradeoffs of risk (0 is normal training) - /// - [Obsolete] - public string BaselineAlphaRisk { get; set; } - - /// - /// The discount freeform which specifies the per position discounts of documents in a query (uses a single variable P for position where P=0 is first position) - /// - [Obsolete] - public string PositionDiscountFreeform { get; set; } - - /// - /// Allows to choose Parallel FastTree Learning Algorithm - /// - [JsonConverter(typeof(ComponentSerializer))] - [Obsolete] - public ParallelTraining ParallelTrainer { get; set; } = new SingleParallelTraining(); - - /// - /// The number of threads to use - /// - [Obsolete] - public int? NumThreads { get; set; } - - /// - /// The seed of the random number generator - /// - [Obsolete] - public int RngSeed { get; set; } = 123; - - /// - /// The seed of the active feature selection - /// - [Obsolete] - public int FeatureSelectSeed { get; set; } = 123; - - /// - /// The entropy (regularization) coefficient between 0 and 1 - /// - [Obsolete] - public double EntropyCoefficient { get; set; } - - /// - /// The number of histograms in the pool (between 2 and numLeaves) - /// - [Obsolete] - public int HistogramPoolSize { get; set; } = -1; - - /// - /// Whether to utilize the disk or the data's native transposition facilities (where applicable) when performing the transpose - /// - [Obsolete] - public bool? DiskTranspose { get; set; } - - /// - /// Whether to collectivize features during dataset preparation to speed up training - /// - [Obsolete] - public bool FeatureFlocks { get; set; } = true; - - /// - /// Whether to do split based on multiple categorical feature values. - /// - [Obsolete] - public bool CategoricalSplit { get; set; } = false; - - /// - /// Maximum categorical split groups to consider when splitting on a categorical feature. Split groups are a collection of split points. This is used to reduce overfitting when there many categorical features. - /// - [Obsolete] - public int MaxCategoricalGroupsPerNode { get; set; } = 64; - - /// - /// Maximum categorical split points to consider when splitting on a categorical feature. - /// - [Obsolete] - public int MaxCategoricalSplitPoints { get; set; } = 64; - - /// - /// Minimum categorical docs percentage in a bin to consider for a split. - /// - [Obsolete] - public double MinDocsPercentageForCategoricalSplit { get; set; } = 0.001d; - - /// - /// Minimum categorical doc count in a bin to consider for a split. - /// - [Obsolete] - public int MinDocsForCategoricalSplit { get; set; } = 100; - - /// - /// Bias for calculating gradient for each feature bin for a categorical feature. - /// - [Obsolete] - public double Bias { get; set; } - - /// - /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. - /// - [Obsolete] - public Microsoft.ML.Legacy.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Legacy.Trainers.Bundle.None; - - /// - /// Maximum number of distinct values (bins) per feature - /// - [Obsolete] - public int MaxBins { get; set; } = 255; - - /// - /// Sparsity level needed to use sparse feature representation - /// - [Obsolete] - public double SparsifyThreshold { get; set; } = 0.7d; - - /// - /// The feature first use penalty coefficient - /// - [Obsolete] - public double FeatureFirstUsePenalty { get; set; } - - /// - /// The feature re-use penalty (regularization) coefficient - /// - [Obsolete] - public double FeatureReusePenalty { get; set; } - - /// - /// Tree fitting gain confidence requirement (should be in the range [0,1) ). - /// - [Obsolete] - public double GainConfidenceLevel { get; set; } - - /// - /// The temperature of the randomized softmax distribution for choosing the feature - /// - [Obsolete] - public double SoftmaxTemperature { get; set; } - - /// - /// Print execution time breakdown to stdout - /// - [Obsolete] - public bool ExecutionTimes { get; set; } = false; - - /// - /// The max number of leaves in each regression tree - /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] - [Obsolete] - public int NumLeaves { get; set; } = 20; - - /// - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data - /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] - [Obsolete] - public int MinDocumentsInLeafs { get; set; } = 10; - - /// - /// Total number of decision trees to create in the ensemble - /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] - [Obsolete] - public int NumTrees { get; set; } = 100; - - /// - /// The fraction of features (chosen randomly) to use on each iteration - /// - [Obsolete] - public double FeatureFraction { get; set; } = 1d; - - /// - /// Number of trees in each bag (0 for disabling bagging) - /// - [Obsolete] - public int BaggingSize { get; set; } - - /// - /// Percentage of training examples used in each bag - /// - [Obsolete] - public double BaggingTrainFraction { get; set; } = 0.7d; - - /// - /// The fraction of features (chosen randomly) to use on each split - /// - [Obsolete] - public double SplitFraction { get; set; } = 1d; - - /// - /// Smoothing paramter for tree regularization - /// - [Obsolete] - public double Smoothing { get; set; } - - /// - /// When a root split is impossible, allow training to proceed - /// - [Obsolete] - public bool AllowEmptyTrees { get; set; } = true; - - /// - /// The level of feature compression to use - /// - [Obsolete] - public int FeatureCompressionLevel { get; set; } = 1; - - /// - /// Compress the tree Ensemble - /// - [Obsolete] - public bool CompressEnsemble { get; set; } = false; - - /// - /// Maximum Number of trees after compression - /// - [Obsolete] - public int MaxTreesAfterCompression { get; set; } = -1; - - /// - /// Print metrics graph for the first test set - /// - [Obsolete] - public bool PrintTestGraph { get; set; } = false; - - /// - /// Print Train and Validation metrics in graph - /// - [Obsolete] - public bool PrintTrainValidGraph { get; set; } = false; - - /// - /// Calculate metric values for train/valid/test every k rounds - /// - [Obsolete] - public int TestFrequency { get; set; } = 2147483647; - - /// - /// Column to use for example groupId - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional GroupIdColumn { get; set; } - - /// - /// Column to use for example weight - /// - [Obsolete] - public Microsoft.ML.EntryPoints.Optional WeightColumn { get; set; } - - /// - /// Column to use for labels - /// - [Obsolete] - public string LabelColumn { get; set; } = "Label"; - - /// - /// The data to be used for training - /// - [Obsolete] - public Var TrainingData { get; set; } = new Var(); - - /// - /// Column to use for features - /// - [Obsolete] - public string FeatureColumn { get; set; } = "Features"; - - /// - /// Normalize option for the feature column - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Legacy.Models.NormalizeOption.Auto; - - /// - /// Whether learner should cache input training data - /// - [Obsolete] - public Microsoft.ML.Legacy.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Legacy.Models.CachingOptions.Auto; - - [Obsolete] - internal override string ComponentName => "FastTreeTweedieRegression"; - } - - [Obsolete] - public abstract class NgramExtractor : ComponentKind {} - - - - /// - /// Extracts NGrams from text and convert them to vector using dictionary. - /// - [Obsolete] - public sealed class NGramNgramExtractor : NgramExtractor - { - /// - /// Ngram length - /// - [Obsolete] - public int NgramLength { get; set; } = 1; - - /// - /// Maximum number of tokens to skip when constructing an ngram - /// - [Obsolete] - public int SkipLength { get; set; } - - /// - /// Whether to include all ngram lengths up to NgramLength or only NgramLength - /// - [Obsolete] - public bool AllLengths { get; set; } = true; - - /// - /// Maximum number of ngrams to store in the dictionary - /// - [Obsolete] - public int[] MaxNumTerms { get; set; } = { 10000000 }; - - /// - /// The weighting criteria - /// - [Obsolete] - public Microsoft.ML.Legacy.Transforms.NgramExtractingEstimatorWeightingCriteria Weighting { get; set; } = Microsoft.ML.Legacy.Transforms.NgramExtractingEstimatorWeightingCriteria.Tf; - - [Obsolete] - internal override string ComponentName => "NGram"; - } - - - - /// - /// Extracts NGrams from text and convert them to vector using hashing trick. - /// - [Obsolete] - public sealed class NGramHashNgramExtractor : NgramExtractor - { - /// - /// Ngram length - /// - [Obsolete] - public int NgramLength { get; set; } = 1; - - /// - /// Maximum number of tokens to skip when constructing an ngram - /// - [Obsolete] - public int SkipLength { get; set; } - - /// - /// Number of bits to hash into. Must be between 1 and 30, inclusive. - /// - [Obsolete] - public int HashBits { get; set; } = 16; - - /// - /// Hashing seed - /// - [Obsolete] - public uint Seed { get; set; } = 314489979; - - /// - /// Whether the position of each source column should be included in the hash (when there are multiple source columns). - /// - [Obsolete] - public bool Ordered { get; set; } = true; - - /// - /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit. - /// - [Obsolete] - public int InvertHash { get; set; } - - /// - /// Whether to include all ngram lengths up to ngramLength or only ngramLength - /// - [Obsolete] - public bool AllLengths { get; set; } = true; - - [Obsolete] - internal override string ComponentName => "NGramHash"; - } - - [Obsolete] - public abstract class ParallelLightGBM : ComponentKind {} - - - - /// - /// Single node machine learning process. - /// - [Obsolete] - public sealed class SingleParallelLightGBM : ParallelLightGBM - { - [Obsolete] - internal override string ComponentName => "Single"; - } - - [Obsolete] - public abstract class ParallelTraining : ComponentKind {} - - - - /// - /// Single node machine learning process. - /// - [Obsolete] - public sealed class SingleParallelTraining : ParallelTraining - { - [Obsolete] - internal override string ComponentName => "Single"; - } - - [Obsolete] - public abstract class PartitionedPathParser : ComponentKind {} - - - - /// - /// Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet - /// - [Obsolete] - public sealed class ParquetPathParserPartitionedPathParser : PartitionedPathParser - { - [Obsolete] - internal override string ComponentName => "ParquetPathParser"; - } - - - [Obsolete] - public sealed partial class PartitionedFileLoaderColumn - { - /// - /// Name of the column. - /// - [Obsolete] - public string Name { get; set; } - - /// - /// Data type of the column. - /// - [Obsolete] - public Microsoft.ML.Legacy.Data.DataKind? Type { get; set; } - - /// - /// Index of the directory representing this column. - /// - [Obsolete] - public int Source { get; set; } - - } - - - /// - /// A simple parser that extracts directory names as column values. Column names are defined as arguments. - /// - [Obsolete] - public sealed class SimplePathParserPartitionedPathParser : PartitionedPathParser - { - /// - /// Column definitions used to override the Partitioned Path Parser. Expected with the format name:type:numeric-source, for example, col=MyFeature:R4:1 - /// - [Obsolete] - public PartitionedFileLoaderColumn[] Columns { get; set; } - - /// - /// Data type of each column. - /// - [Obsolete] - public Microsoft.ML.Legacy.Data.DataKind Type { get; set; } = Microsoft.ML.Legacy.Data.DataKind.TX; - - [Obsolete] - internal override string ComponentName => "SimplePathParser"; - } - - [Obsolete] - public abstract class RegressionLossFunction : ComponentKind {} - - - - /// - /// Poisson loss. - /// - [Obsolete] - public sealed class PoissonLossRegressionLossFunction : RegressionLossFunction - { - [Obsolete] - internal override string ComponentName => "PoissonLoss"; - } - - - - /// - /// Squared loss. - /// - [Obsolete] - public sealed class SquaredLossRegressionLossFunction : RegressionLossFunction - { - [Obsolete] - internal override string ComponentName => "SquaredLoss"; - } - - - - /// - /// Tweedie loss. - /// - [Obsolete] - public sealed class TweedieLossRegressionLossFunction : RegressionLossFunction - { - /// - /// Index parameter for the Tweedie distribution, in the range [1, 2]. 1 is Poisson loss, 2 is gamma loss, and intermediate values are compound Poisson loss. - /// - [Obsolete] - public double Index { get; set; } = 1.5d; - - [Obsolete] - internal override string ComponentName => "TweedieLoss"; - } - - [Obsolete] - public abstract class SDCAClassificationLossFunction : ComponentKind {} - - - - /// - /// Hinge loss. - /// - [Obsolete] - public sealed class HingeLossSDCAClassificationLossFunction : SDCAClassificationLossFunction - { - /// - /// Margin value - /// - [Obsolete] - public float Margin { get; set; } = 1f; - - [Obsolete] - internal override string ComponentName => "HingeLoss"; - } - - - - /// - /// Log loss. - /// - [Obsolete] - public sealed class LogLossSDCAClassificationLossFunction : SDCAClassificationLossFunction - { - [Obsolete] - internal override string ComponentName => "LogLoss"; - } - - - - /// - /// Smoothed Hinge loss. - /// - [Obsolete] - public sealed class SmoothedHingeLossSDCAClassificationLossFunction : SDCAClassificationLossFunction - { - /// - /// Smoothing constant - /// - [Obsolete] - public float SmoothingConst { get; set; } = 1f; - - [Obsolete] - internal override string ComponentName => "SmoothedHingeLoss"; - } - - [Obsolete] - public abstract class SDCARegressionLossFunction : ComponentKind {} - - - - /// - /// Squared loss. - /// - [Obsolete] - public sealed class SquaredLossSDCARegressionLossFunction : SDCARegressionLossFunction - { - [Obsolete] - internal override string ComponentName => "SquaredLoss"; - } - - [Obsolete] - public abstract class StopWordsRemover : ComponentKind {} - - - - /// - /// Remover with list of stopwords specified by the user. - /// - [Obsolete] - public sealed class CustomStopWordsRemover : StopWordsRemover - { - /// - /// List of stopwords - /// - [Obsolete] - public string[] Stopword { get; set; } - - [Obsolete] - internal override string ComponentName => "Custom"; - } - - - - /// - /// Remover with predefined list of stop words. - /// - [Obsolete] - public sealed class PredefinedStopWordsRemover : StopWordsRemover - { - [Obsolete] - internal override string ComponentName => "Predefined"; - } - -} -#pragma warning restore diff --git a/src/Microsoft.ML.Legacy/Data/CollectionDataSource.cs b/src/Microsoft.ML.Legacy/Data/CollectionDataSource.cs deleted file mode 100644 index 33f973c411..0000000000 --- a/src/Microsoft.ML.Legacy/Data/CollectionDataSource.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.Internal.Utilities; - -namespace Microsoft.ML.Legacy.Data -{ - /// - /// Creates data source for pipeline based on provided collection of data. - /// - [Obsolete] - public static class CollectionDataSource - { - /// - /// Creates pipeline data source. Support shuffle. - /// - public static ILearningPipelineLoader Create(IList data) where T : class - { - return new ListDataSource(data); - } - - /// - /// Creates pipeline data source which can't be shuffled. - /// - public static ILearningPipelineLoader Create(IEnumerable data) where T : class - { - return new EnumerableDataSource(data); - } - - private abstract class BaseDataSource : ILearningPipelineLoader where TInput : class - { - private Data.DataViewReference _dataViewEntryPoint; - private IDataView _dataView; - - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - Contracts.Assert(previousStep == null); - _dataViewEntryPoint = new Data.DataViewReference(); - var importOutput = experiment.Add(_dataViewEntryPoint); - return new CollectionDataSourcePipelineStep(importOutput.Data); - } - - public void SetInput(IHostEnvironment environment, Experiment experiment) - { - _dataView = GetDataView(environment); - environment.CheckValue(_dataView, nameof(_dataView)); - experiment.SetInput(_dataViewEntryPoint.Data, _dataView); - } - - public Var GetInputData() => null; - - public abstract IDataView GetDataView(IHostEnvironment environment); - } - - private class EnumerableDataSource : BaseDataSource where TInput : class - { - private readonly IEnumerable _enumerableCollection; - - public EnumerableDataSource(IEnumerable collection) - { - Contracts.CheckValue(collection, nameof(collection)); - _enumerableCollection = collection; - } - - public override IDataView GetDataView(IHostEnvironment environment) - { - return ComponentCreation.CreateStreamingDataView(environment, _enumerableCollection); - } - } - - private class ListDataSource : BaseDataSource where TInput : class - { - private readonly IList _listCollection; - - public ListDataSource(IList collection) - { - Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); - _listCollection = collection; - } - - public override IDataView GetDataView(IHostEnvironment environment) - { - return ComponentCreation.CreateDataView(environment, _listCollection); - } - } - - private class CollectionDataSourcePipelineStep : ILearningPipelineDataStep - { - public CollectionDataSourcePipelineStep(Var data) - { - Data = data; - } - - public Var Data { get; } - public Var Model => null; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Data/TextLoader.cs b/src/Microsoft.ML.Legacy/Data/TextLoader.cs deleted file mode 100644 index 0800bd3bf6..0000000000 --- a/src/Microsoft.ML.Legacy/Data/TextLoader.cs +++ /dev/null @@ -1,202 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using System.Text.RegularExpressions; -using Microsoft.ML.Data; - -namespace Microsoft.ML.Legacy.Data -{ - public sealed partial class TextLoaderRange - { - public TextLoaderRange() - { - } - - /// - /// Convenience constructor for the scalar case, when a given column - /// in the schema spans only a single column in the dataset. - /// and are set to the single value . - /// - /// Column index in the dataset. - public TextLoaderRange(int ordinal) - { - - Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Cannot be a negative number"); - - Min = ordinal; - Max = ordinal; - } - - /// - /// Convenience constructor for the vector case, when a given column - /// in the schema spans contiguous columns in the dataset. - /// - /// Starting column index in the dataset. - /// Ending column index in the dataset. - public TextLoaderRange(int min, int max) - { - - Contracts.CheckParam(min >= 0, nameof(min), "Cannot be a negative number."); - Contracts.CheckParam(max >= min, nameof(max), "Cannot be less than " + nameof(min) + "."); - - Min = min; - Max = max; - } - } - - public sealed partial class TextLoader - { - /// - /// Construct a TextLoader object by inferencing the dataset schema from a type. - /// - /// Does the file contains header? - /// Column separator character. Default is '\t' - /// Whether the input may include quoted values, - /// which can contain separator characters, colons, - /// and distinguish empty values from missing values. When true, consecutive separators - /// denote a missing value and an empty value is denoted by \"\". - /// When false, consecutive separators denote an empty value. - /// Whether the input may include sparse representations for example, - /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero - /// except for 3rd and 5th columns which have values 6 and 3 - /// Remove trailing whitespace from lines - public TextLoader CreateFrom(bool useHeader = false, - char separator = '\t', bool allowQuotedStrings = true, - bool supportSparse = true, bool trimWhitespace = false) - { - var userType = typeof(TInput); - - var fieldInfos = userType.GetFields(BindingFlags.Public | BindingFlags.Instance); - - var propertyInfos = - userType - .GetProperties(BindingFlags.Public | BindingFlags.Instance) - .Where(x => x.CanRead && x.CanWrite && x.GetGetMethod() != null && x.GetSetMethod() != null && x.GetIndexParameters().Length == 0); - - var memberInfos = (fieldInfos as IEnumerable).Concat(propertyInfos).ToArray(); - - Arguments.Column = new TextLoaderColumn[memberInfos.Length]; - for (int index = 0; index < memberInfos.Length; index++) - { - var memberInfo = memberInfos[index]; - var mappingAttr = memberInfo.GetCustomAttribute(); - if (mappingAttr == null) - throw Contracts.Except($"Field or property {memberInfo.Name} is missing LoadColumnAttributeAttribute"); -#pragma warning disable 618 - if (Regex.Match(mappingAttr.Start, @"[^(0-9,\*\-~)]+").Success) - throw Contracts.Except($"{mappingAttr.Start} contains invalid characters. " + - $"Valid characters are 0-9, *, - and ~"); - - var mappingNameAttr = memberInfo.GetCustomAttribute(); - var name = mappingNameAttr?.Name ?? memberInfo.Name; - - ML.Data.TextLoader.Range[] sources; - if (!ML.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Start, out sources)) - throw Contracts.Except($"{mappingAttr.Start} could not be parsed."); -#pragma warning restore 618 - Contracts.Assert(sources != null); - - TextLoaderColumn tlc = new TextLoaderColumn(); - tlc.Name = name; - tlc.Source = new TextLoaderRange[sources.Length]; - DataKind dk; - switch (memberInfo) - { - case FieldInfo field: - if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk)) - throw Contracts.Except($"Field {name} is of unsupported type."); - - break; - - case PropertyInfo property: - if (!TryGetDataKind(property.PropertyType.IsArray ? property.PropertyType.GetElementType() : property.PropertyType, out dk)) - throw Contracts.Except($"Property {name} is of unsupported type."); - break; - - default: - Contracts.Assert(false); - throw Contracts.ExceptNotSupp("Expected a FieldInfo or a PropertyInfo"); - } - - tlc.Type = dk; - - for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) - { - tlc.Source[indexLocal] = new TextLoaderRange - { - AllOther = sources[indexLocal].AllOther, - AutoEnd = sources[indexLocal].AutoEnd, - ForceVector = sources[indexLocal].ForceVector, - VariableEnd = sources[indexLocal].VariableEnd, - Max = sources[indexLocal].Max, - Min = sources[indexLocal].Min - }; - } - - Arguments.Column[index] = tlc; - } - - Arguments.HasHeader = useHeader; - Arguments.Separator = new[] { separator }; - Arguments.AllowQuoting = allowQuotedStrings; - Arguments.AllowSparse = supportSparse; - Arguments.TrimWhitespace = trimWhitespace; - - return this; - } - - /// - /// Try to map a System.Type to a corresponding DataKind value. - /// - private static bool TryGetDataKind(Type type, out DataKind kind) - { - Contracts.AssertValue(type); - - // REVIEW: Make this more efficient. Should we have a global dictionary? - if (type == typeof(sbyte)) - kind = DataKind.I1; - else if (type == typeof(byte) || type == typeof(char)) - kind = DataKind.U1; - else if (type == typeof(short)) - kind = DataKind.I2; - else if (type == typeof(ushort)) - kind = DataKind.U2; - else if (type == typeof(int)) - kind = DataKind.I4; - else if (type == typeof(uint)) - kind = DataKind.U4; - else if (type == typeof(long)) - kind = DataKind.I8; - else if (type == typeof(ulong)) - kind = DataKind.U8; - else if (type == typeof(Single)) - kind = DataKind.R4; - else if (type == typeof(Double)) - kind = DataKind.R8; - else if (type == typeof(ReadOnlyMemory) || type == typeof(string)) - kind = DataKind.TX; - else if (type == typeof(bool)) - kind = DataKind.BL; - else if (type == typeof(TimeSpan)) - kind = DataKind.TS; - else if (type == typeof(DateTime)) - kind = DataKind.DT; - else if (type == typeof(DateTimeOffset)) - kind = DataKind.DZ; - else if (type == typeof(RowId)) - kind = DataKind.UG; - else - { - kind = default(DataKind); - return false; - } - - return true; - } - } -} diff --git a/src/Microsoft.ML.Legacy/ILearningPipelineItem.cs b/src/Microsoft.ML.Legacy/ILearningPipelineItem.cs deleted file mode 100644 index 341b936420..0000000000 --- a/src/Microsoft.ML.Legacy/ILearningPipelineItem.cs +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; - -namespace Microsoft.ML.Legacy -{ - /// - /// An item that can be added to the Learning Pipeline. - /// - [Obsolete] - public interface ILearningPipelineItem - { - ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment); - - /// - /// Returns the place holder for input IDataView object for the node in the execution graph. - /// - /// - Var GetInputData(); - } - - /// - /// A data loader that can be added to the Learning Pipeline. - /// - [Obsolete] - public interface ILearningPipelineLoader : ILearningPipelineItem - { - void SetInput(IHostEnvironment environment, Experiment experiment); - } - - /// - /// An item that can be added to the Learning Pipeline that can be trained and or return a IDataView. - /// This encapsulates an IDataView(input) and ITransformModel(output) object for a transform and - /// for a learner it will encapsulate IDataView(input) and IPredictorModel(output). - /// - [Obsolete] - public interface ILearningPipelineStep - { - } - - [Obsolete] - public interface ILearningPipelineDataStep : ILearningPipelineStep - { - Var Data { get; } - Var Model { get; } - } - - [Obsolete] - public interface ILearningPipelinePredictorStep : ILearningPipelineStep - { - Var Model { get; } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/LearningPipeline.cs b/src/Microsoft.ML.Legacy/LearningPipeline.cs deleted file mode 100644 index a4bc9e0027..0000000000 --- a/src/Microsoft.ML.Legacy/LearningPipeline.cs +++ /dev/null @@ -1,279 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections; -using System.Collections.Generic; -using System.Diagnostics; -using System.IO; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; - -namespace Microsoft.ML.Legacy -{ - [Obsolete] - public sealed class ScorerPipelineStep : ILearningPipelineDataStep - { - public ScorerPipelineStep(Var data, Var model) - { - Data = data; - Model = model; - } - - public Var Data { get; } - public Var Model { get; } - } - - /// - /// The class is used to define the steps needed to perform a desired machine learning task. - /// The steps are defined by adding a data loader (for example, ) followed by zero or more transforms (for example, ) - /// and at most one trainer/learner (for example, ) in the pipeline. - /// - /// - /// - /// - /// For example, - /// - /// var pipeline = new LearningPipeline(); - /// pipeline.Add(new TextLoader <SentimentData> (dataPath, separator: ",")); - /// pipeline.Add(new TextFeaturizer("Features", "SentimentText")); - /// pipeline.Add(new FastTreeBinaryClassifier()); - /// - /// var model = pipeline.Train<SentimentData, SentimentPrediction>(); - /// - /// - [Obsolete] - [DebuggerTypeProxy(typeof(LearningPipelineDebugProxy))] - public class LearningPipeline : ICollection - { - private List Items { get; } - private readonly int? _seed; - private readonly int _conc; - - /// - /// Construct an empty object. - /// - public LearningPipeline() - : this(conc: 0) - { - } - - /// - /// Construct an empty object. - /// - /// Specify seed for random generator - /// Specify concurrency factor (default value - autoselection) - internal LearningPipeline(int? seed = null, int conc = 0) - { - Items = new List(); - _seed = seed; - _conc = conc; - } - - /// - /// Get the count of ML components in the object - /// - public int Count => Items.Count; - public bool IsReadOnly => false; - - /// - /// Add a data loader, transform or trainer into the pipeline. - /// Possible data loader(s), transforms and trainers options are - /// - /// Data Loader: - /// - /// etc. - /// - /// - /// Transforms: - /// , - /// - /// , - /// , - /// , - /// , - /// etc. - /// - /// - /// Trainers: - /// , - /// , - /// , - /// , - /// etc. - /// - /// For a complete list of transforms and trainers, please see "Microsoft.ML.Transforms" and "Microsoft.ML.Trainers" namespaces. - /// - /// Any ML component (data loader, transform or trainer) defined as . - public void Add(ILearningPipelineItem item) => Items.Add(item); - - /// - /// Add a data loader, transform or trainer into the pipeline. - /// - /// Any ML component (data loader, transform or trainer) defined as . - /// Pipeline with added item - public LearningPipeline Append(ILearningPipelineItem item) - { - Add(item); - return this; - } - /// - /// Remove all the loaders/transforms/trainers from the pipeline. - /// - public void Clear() => Items.Clear(); - - /// - /// Check if a specific loader/transform/trainer is in the pipeline? - /// - /// Any ML component (data loader, transform or trainer) defined as . - /// true if item is found in the pipeline; otherwise, false. - public bool Contains(ILearningPipelineItem item) => Items.Contains(item); - - /// - /// Copy the pipeline items into an array. - /// - /// The one-dimensional Array that is the destination of the elements copied from. - /// The zero-based index in at which copying begins. - public void CopyTo(ILearningPipelineItem[] array, int arrayIndex) => Items.CopyTo(array, arrayIndex); - public IEnumerator GetEnumerator() => Items.GetEnumerator(); - - /// - /// Remove an item from the pipeline. - /// - /// to remove. - /// true if item was removed from the pipeline; otherwise, false. - public bool Remove(ILearningPipelineItem item) => Items.Remove(item); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - - /// - /// Train the model using the ML components in the pipeline. - /// - /// Type of data instances the model will be trained on. It's a custom type defined by the user according to the structure of data. - /// - /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on input type. - /// - /// Ouput type. The prediction will be return based on this type. - /// Please see https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet/get-started/windows for more details on output type. - /// - /// PredictionModel object. This is the model object used for prediction on new instances. - public PredictionModel Train() - where TInput : class - where TOutput : class, new() - { - var environment = new MLContext(seed: _seed, conc: _conc); - Experiment experiment = environment.CreateExperiment(); - ILearningPipelineStep step = null; - List loaders = new List(); - List> transformModels = new List>(); - Var lastTransformModel = null; - - foreach (ILearningPipelineItem currentItem in this) - { - if (currentItem is ILearningPipelineLoader loader) - loaders.Add(loader); - - step = currentItem.ApplyStep(step, experiment); - if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) - transformModels.Add(dataStep.Model); - else if (step is ILearningPipelinePredictorStep predictorDataStep) - { - if (lastTransformModel != null) - transformModels.Insert(0, lastTransformModel); - - Var predictorModel; - if (transformModels.Count != 0) - { - var localModelInput = new Transforms.ManyHeterogeneousModelCombiner - { - PredictorModel = predictorDataStep.Model, - TransformModels = new ArrayVar(transformModels.ToArray()) - }; - var localModelOutput = experiment.Add(localModelInput); - predictorModel = localModelOutput.PredictorModel; - } - else - predictorModel = predictorDataStep.Model; - - var scorer = new Transforms.Scorer - { - PredictorModel = predictorModel - }; - - var scorerOutput = experiment.Add(scorer); - lastTransformModel = scorerOutput.ScoringTransform; - step = new ScorerPipelineStep(scorerOutput.ScoredData, scorerOutput.ScoringTransform); - transformModels.Clear(); - } - } - - if (transformModels.Count > 0) - { - if (lastTransformModel != null) - transformModels.Insert(0, lastTransformModel); - - var modelInput = new Transforms.ModelCombiner - { - Models = new ArrayVar(transformModels.ToArray()) - }; - - var modelOutput = experiment.Add(modelInput); - lastTransformModel = modelOutput.OutputModel; - } - - experiment.Compile(); - foreach (ILearningPipelineLoader loader in loaders) - { - loader.SetInput(environment, experiment); - } - experiment.Run(); - - TransformModel model = experiment.GetOutput(lastTransformModel); - BatchPredictionEngine predictor; - using (var memoryStream = new MemoryStream()) - { - model.Save(environment, memoryStream); - - memoryStream.Position = 0; - - predictor = environment.CreateBatchPredictionEngine(memoryStream); - - return new PredictionModel(predictor, memoryStream); - } - } - - /// - /// Executes a pipeline and returns the resulting data. - /// - /// - /// The IDataView that was returned by the pipeline. - /// - internal IDataView Execute(IHostEnvironment environment) - { - Experiment experiment = environment.CreateExperiment(); - ILearningPipelineStep step = null; - List loaders = new List(); - foreach (ILearningPipelineItem currentItem in this) - { - if (currentItem is ILearningPipelineLoader loader) - loaders.Add(loader); - - step = currentItem.ApplyStep(step, experiment); - } - - if (!(step is ILearningPipelineDataStep endDataStep)) - { - throw new InvalidOperationException($"{nameof(LearningPipeline)}.{nameof(Execute)} must have a Data step as the last step."); - } - - experiment.Compile(); - foreach (ILearningPipelineLoader loader in loaders) - { - loader.SetInput(environment, experiment); - } - experiment.Run(); - - return experiment.GetOutput(endDataStep.Data); - } - } -} diff --git a/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs b/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs deleted file mode 100644 index 150c1634f9..0000000000 --- a/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs +++ /dev/null @@ -1,245 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Legacy -{ - /// - /// The debug proxy class for a LearningPipeline. - /// Displays the current columns and values in the debugger Watch window. - /// - [Obsolete] - internal sealed class LearningPipelineDebugProxy - { - // load more rows than we display in order for transforms like CategoricalOneHotVectorizer - // to see more rows, and get a more accurate cardinality of the column. - private const int MaxLoaderRows = 100; - private const int MaxDisplayRows = 10; - private const int MaxSlotNamesToDisplay = 100; - - private readonly LearningPipeline _pipeline; - private readonly IHostEnvironment _environment; - private IDataView _preview; - private Exception _pipelineExecutionException; - private PipelineItemDebugColumn[] _columns; - private PipelineItemDebugRow[] _rows; - - public LearningPipelineDebugProxy(LearningPipeline pipeline) - { - if (pipeline == null) - throw new ArgumentNullException(nameof(pipeline)); - - _pipeline = new LearningPipeline(); - - // use a ConcurrencyFactor of 1 so other threads don't need to run in the debugger - _environment = new MLContext(conc: 1); - - foreach (ILearningPipelineItem item in pipeline) - { - _pipeline.Add(item); - - if (item is ILearningPipelineLoader loaderItem) - { - // add a take filter to any loaders, so it returns in a reasonable - // amount of time - _pipeline.Add(new RowTakeFilter() { Count = MaxLoaderRows }); - } - } - } - - /// - /// Gets the column information of the pipeline. - /// - public PipelineItemDebugColumn[] Columns - { - get - { - if (_columns == null) - { - _columns = BuildColumns(); - } - return _columns; - } - } - - private PipelineItemDebugColumn[] BuildColumns() - { - IDataView dataView = ExecutePipeline(); - - var colIndices = GetColIndices(dataView); - var colCount = colIndices.Count; - - PipelineItemDebugColumn[] result = new PipelineItemDebugColumn[colCount]; - - for (int i = 0; i < colCount; i++) - { - var colIndex = colIndices[i]; - result[i] = new PipelineItemDebugColumn() - { - Name = dataView.Schema[colIndex].Name, - Type = dataView.Schema[colIndex].Type.ToString() - }; - - if (dataView.Schema[colIndex].Type.IsVector) - { - var n = dataView.Schema[colIndex].Type.VectorSize; - if (dataView.Schema[colIndex].HasSlotNames(n)) - { - var slots = default(VBuffer>); - dataView.Schema[colIndex].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref slots); - - bool appendEllipse = false; - IEnumerable> slotNames = slots.Items(true).Select(x => x.Value); - if (slots.Length > MaxSlotNamesToDisplay) - { - appendEllipse = true; - slotNames = slotNames.Take(MaxSlotNamesToDisplay); - } - - result[i].SlotNames = string.Join(",", slotNames); - - if (appendEllipse) - { - result[i].SlotNames += ",..."; - } - } - } - } - - return result; - } - - /// - /// Gets the row information of the pipeline. - /// - public PipelineItemDebugRow[] Rows - { - get - { - if (_rows == null) - { - _rows = BuildRows(); - } - return _rows; - } - } - - private IDataView ExecutePipeline() - { - if (_preview == null) - { - if (_pipeline != null) - { - try - { - _preview = _pipeline.Execute(_environment); - } - catch (Exception e) - { - _pipelineExecutionException = e; - var builder = new SchemaBuilder(); - builder.AddColumn("Blank", TextType.Instance); - _preview = new EmptyDataView(_environment, builder.GetSchema()); - } - } - } - return _preview; - } - - private PipelineItemDebugRow[] BuildRows() - { - PipelineItemDebugRow[] result = new PipelineItemDebugRow[MaxDisplayRows]; - - int i = 0; - IDataView pipelineResult = ExecutePipeline(); - if (_pipelineExecutionException != null) - { - result[0] = new PipelineItemDebugRow() - { - Values = _pipelineExecutionException.ToString() - }; - return result; - } - - StringBuilder valuesBuilder = new StringBuilder(); - using (var cursor = pipelineResult.GetRowCursor(c => true)) - { - var colIndices = GetColIndices(pipelineResult); - var colCount = colIndices.Count; - - var getters = DataViewUtils.PopulateGetterArray(cursor, colIndices); - - var row = new ReadOnlyMemory[colCount]; - while (cursor.MoveNext() && i < MaxDisplayRows) - { - for (int column = 0; column < colCount; column++) - { - if (column != 0) - { - valuesBuilder.Append(" | "); - } - - getters[column](ref row[column]); - valuesBuilder.Append(row[column]); - } - - result[i] = new PipelineItemDebugRow() - { - Values = valuesBuilder.ToString() - }; - - valuesBuilder.Clear(); - i++; - } - } - - return result; - } - - private static List GetColIndices(IDataView dataView) - { - int totalColCount = dataView.Schema.Count; - // getting distinct columns - HashSet columnNames = new HashSet(); - var colIndices = new List(); - for (int i = totalColCount - 1; i >= 0; i--) - { - var name = dataView.Schema[i].Name; - if (columnNames.Add(name)) - colIndices.Add(i); - } - colIndices.Reverse(); - - return colIndices; - } - } - - [Obsolete] - [DebuggerDisplay("{Name} {Type}{SlotNames}")] - internal class PipelineItemDebugColumn - { - public string Name { get; set; } - public string Type { get; set; } - public string SlotNames { get; set; } - - public PipelineItemDebugColumn() - { - SlotNames = string.Empty; - } - } - - [Obsolete] - [DebuggerDisplay("{Values}")] - internal class PipelineItemDebugRow - { - public string Values { get; set; } - } -} diff --git a/src/Microsoft.ML.Legacy/Microsoft.ML.Legacy.csproj b/src/Microsoft.ML.Legacy/Microsoft.ML.Legacy.csproj deleted file mode 100644 index 4622d2827c..0000000000 --- a/src/Microsoft.ML.Legacy/Microsoft.ML.Legacy.csproj +++ /dev/null @@ -1,27 +0,0 @@ - - - - netstandard2.0 - Microsoft.ML - CORECLR - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Models/BinaryClassificationEvaluator.cs b/src/Microsoft.ML.Legacy/Models/BinaryClassificationEvaluator.cs deleted file mode 100644 index bde112057d..0000000000 --- a/src/Microsoft.ML.Legacy/Models/BinaryClassificationEvaluator.cs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class BinaryClassificationEvaluator - { - /// - /// Computes the quality metrics for the PredictionModel using the specified data set. - /// - /// - /// The trained PredictionModel to be evaluated. - /// - /// - /// The test data that will be predicted and used to evaluate the model. - /// - /// - /// A BinaryClassificationMetrics instance that describes how well the model performed against the test data. - /// - public BinaryClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) - { - var environment = new MLContext(); - environment.CheckValue(model, nameof(model)); - environment.CheckValue(testData, nameof(testData)); - - Experiment experiment = environment.CreateExperiment(); - - ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); - if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) - { - throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); - } - - var datasetScorer = new DatasetTransformScorer - { - Data = testDataOutput.Data - }; - DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); - - Data = scoreOutput.ScoredData; - Output evaluteOutput = experiment.Add(this); - - experiment.Compile(); - - experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); - testData.SetInput(environment, experiment); - - experiment.Run(); - - IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); - if (overallMetrics == null) - { - throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate."); - } - - IDataView confusionMatrix = experiment.GetOutput(evaluteOutput.ConfusionMatrix); - if (confusionMatrix == null) - { - throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate."); - } - - var metric = BinaryClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); - - if (metric.Count != 1) - throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); - - return metric[0]; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML.Legacy/Models/BinaryClassificationMetrics.cs deleted file mode 100644 index ac8aa0bfbd..0000000000 --- a/src/Microsoft.ML.Legacy/Models/BinaryClassificationMetrics.cs +++ /dev/null @@ -1,216 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using static Microsoft.ML.Data.MetricKinds; - -namespace Microsoft.ML.Legacy.Models -{ - /// - /// This class contains the overall metrics computed by binary classification evaluators. - /// - [Obsolete] - public sealed class BinaryClassificationMetrics - { - private BinaryClassificationMetrics() - { - } - - internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, int confusionMatriceStartIndex = 0) - { - Contracts.AssertValue(env); - env.AssertValue(overallMetrics); - env.AssertValue(confusionMatrix); - - var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - if (!metricsEnumerable.GetEnumerator().MoveNext()) - { - throw env.Except("The overall RegressionMetrics didn't have any rows."); - } - - List metrics = new List(); - var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - - int index = 0; - foreach (var metric in metricsEnumerable) - { - - if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) - { - throw env.Except("Confusion matrices didn't have enough matrices."); - } - - metrics.Add( - new BinaryClassificationMetrics() - { - Auc = metric.Auc, - Accuracy = metric.Accuracy, - PositivePrecision = metric.PositivePrecision, - PositiveRecall = metric.PositiveRecall, - NegativePrecision = metric.NegativePrecision, - NegativeRecall = metric.NegativeRecall, - LogLoss = metric.LogLoss, - LogLossReduction = metric.LogLossReduction, - Entropy = metric.Entropy, - F1Score = metric.F1Score, - Auprc = metric.Auprc, - RowTag = metric.RowTag, - ConfusionMatrix = confusionMatrices.Current, - }); - - } - - return metrics; - } - - /// - /// Gets the area under the ROC curve. - /// - /// - /// The area under the ROC curve is equal to the probability that the classifier ranks - /// a randomly chosen positive instance higher than a randomly chosen negative one - /// (assuming 'positive' ranks higher than 'negative'). - /// - public double Auc { get; private set; } - - /// - /// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set. - /// - public double Accuracy { get; private set; } - - /// - /// Gets the positive precision of a classifier which is the proportion of correctly predicted - /// positive instances among all the positive predictions (i.e., the number of positive instances - /// predicted as positive, divided by the total number of instances predicted as positive). - /// - public double PositivePrecision { get; private set; } - - /// - /// Gets the positive recall of a classifier which is the proportion of correctly predicted - /// positive instances among all the positive instances (i.e., the number of positive instances - /// predicted as positive, divided by the total number of positive instances). - /// - public double PositiveRecall { get; private set; } - - /// - /// Gets the negative precision of a classifier which is the proportion of correctly predicted - /// negative instances among all the negative predictions (i.e., the number of negative instances - /// predicted as negative, divided by the total number of instances predicted as negative). - /// - public double NegativePrecision { get; private set; } - - /// - /// Gets the negative recall of a classifier which is the proportion of correctly predicted - /// negative instances among all the negative instances (i.e., the number of negative instances - /// predicted as negative, divided by the total number of negative instances). - /// - public double NegativeRecall { get; private set; } - - /// - /// Gets the log-loss of the classifier. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to class 1, - /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. - /// - public double LogLoss { get; private set; } - - /// - /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) - /// of the classifier. - /// - /// - /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: - /// (LL(prior) - LL(classifier)) / LL(prior) - /// This metric can be interpreted as the advantage of the classifier over a random prediction. - /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is - /// 20% better than random guessing". - /// - public double LogLossReduction { get; private set; } - - /// - /// Gets the test-set entropy (prior Log-Loss/instance) of the classifier. - /// - public double Entropy { get; private set; } - - /// - /// Gets the F1 score of the classifier. - /// - /// - /// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall). - /// - public double F1Score { get; private set; } - - /// - /// Gets the area under the precision/recall curve of the classifier. - /// - /// - /// The area under the precision/recall curve is a single number summary of the information in the - /// precision/recall curve. It is increasingly used in the machine learning community, particularly - /// for imbalanced datasets where one class is observed more frequently than the other. On these - /// datasets, AUPRC can highlight performance differences that are lost with AUC. - /// - public double Auprc { get; private set; } - - /// - /// Gets the confusion matrix, or error matrix, of the classifier. - /// - public ConfusionMatrix ConfusionMatrix { get; private set; } - - /// - /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. - /// For non-CV scenarios, this is equal to null - /// - public string RowTag { get; private set; } - - /// - /// This class contains the public fields necessary to deserialize from IDataView. - /// - private sealed class SerializationClass - { -#pragma warning disable 649 // never assigned - [ColumnName(BinaryClassifierEvaluator.Auc)] - public Double Auc; - - [ColumnName(BinaryClassifierEvaluator.Accuracy)] - public Double Accuracy; - - [ColumnName(BinaryClassifierEvaluator.PosPrecName)] - public Double PositivePrecision; - - [ColumnName(BinaryClassifierEvaluator.PosRecallName)] - public Double PositiveRecall; - - [ColumnName(BinaryClassifierEvaluator.NegPrecName)] - public Double NegativePrecision; - - [ColumnName(BinaryClassifierEvaluator.NegRecallName)] - public Double NegativeRecall; - - [ColumnName(BinaryClassifierEvaluator.LogLoss)] - public Double LogLoss; - - [ColumnName(BinaryClassifierEvaluator.LogLossReduction)] - public Double LogLossReduction; - - [ColumnName(BinaryClassifierEvaluator.Entropy)] - public Double Entropy; - - [ColumnName(BinaryClassifierEvaluator.F1)] - public Double F1Score; - - [ColumnName(BinaryClassifierEvaluator.AuPrc)] - public Double Auprc; - - [ColumnName(ColumnNames.FoldIndex)] - public string RowTag; -#pragma warning restore 649 // never assigned - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/ClassificationEvaluator.cs b/src/Microsoft.ML.Legacy/Models/ClassificationEvaluator.cs deleted file mode 100644 index 97e034d4b5..0000000000 --- a/src/Microsoft.ML.Legacy/Models/ClassificationEvaluator.cs +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class ClassificationEvaluator - { - /// - /// Computes the quality metrics for the multi-class classification PredictionModel - /// using the specified data set. - /// - /// - /// The trained multi-class classification PredictionModel to be evaluated. - /// - /// - /// The test data that will be predicted and used to evaluate the model. - /// - /// - /// A ClassificationMetrics instance that describes how well the model performed against the test data. - /// - public ClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) - { - var environment = new MLContext(); - environment.CheckValue(model, nameof(model)); - environment.CheckValue(testData, nameof(testData)); - - Experiment experiment = environment.CreateExperiment(); - - ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); - if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) - { - throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); - } - - var datasetScorer = new DatasetTransformScorer - { - Data = testDataOutput.Data, - }; - DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); - - Data = scoreOutput.ScoredData; - Output evaluteOutput = experiment.Add(this); - - experiment.Compile(); - - experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); - testData.SetInput(environment, experiment); - - experiment.Run(); - - IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); - if (overallMetrics == null) - { - throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); - } - - IDataView confusionMatrix = experiment.GetOutput(evaluteOutput.ConfusionMatrix); - if (confusionMatrix == null) - { - throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(ClassificationEvaluator)} Evaluate."); - } - - var metric = ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); - - if (metric.Count != 1) - throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); - - return metric[0]; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/ClassificationMetrics.cs b/src/Microsoft.ML.Legacy/Models/ClassificationMetrics.cs deleted file mode 100644 index cf594a354c..0000000000 --- a/src/Microsoft.ML.Legacy/Models/ClassificationMetrics.cs +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using static Microsoft.ML.Data.MetricKinds; - -namespace Microsoft.ML.Legacy.Models -{ - /// - /// This class contains the overall metrics computed by multi-class classification evaluators. - /// - [Obsolete] - public sealed class ClassificationMetrics - { - private ClassificationMetrics() - { - } - - internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, - int confusionMatriceStartIndex = 0) - { - Contracts.AssertValue(env); - env.AssertValue(overallMetrics); - env.AssertValue(confusionMatrix); - - var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - if (!metricsEnumerable.GetEnumerator().MoveNext()) - { - throw env.Except("The overall RegressionMetrics didn't have any rows."); - } - - List metrics = new List(); - var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - - int index = 0; - foreach (var metric in metricsEnumerable) - { - if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) - { - throw env.Except("Confusion matrices didn't have enough matrices."); - } - - metrics.Add( - new ClassificationMetrics() - { - AccuracyMicro = metric.AccuracyMicro, - AccuracyMacro = metric.AccuracyMacro, - LogLoss = metric.LogLoss, - LogLossReduction = metric.LogLossReduction, - TopKAccuracy = metric.TopKAccuracy, - PerClassLogLoss = metric.PerClassLogLoss, - ConfusionMatrix = confusionMatrices.Current, - RowTag = metric.RowTag, - }); - - } - - return metrics; - } - - /// - /// Gets the micro-average accuracy of the model. - /// - /// - /// The micro-average is the fraction of instances predicted correctly. - /// - /// The micro-average metric weighs each class according to the number of instances that belong - /// to it in the dataset. - /// - public double AccuracyMicro { get; private set; } - - /// - /// Gets the macro-average accuracy of the model. - /// - /// - /// The macro-average is computed by taking the average over all the classes of the fraction - /// of correct predictions in this class (the number of correctly predicted instances in the class, - /// divided by the total number of instances in the class). - /// - /// The macro-average metric gives the same weight to each class, no matter how many instances from - /// that class the dataset contains. - /// - public double AccuracyMacro { get; private set; } - - /// - /// Gets the average log-loss of the classifier. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to class 1, - /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. - /// - public double LogLoss { get; private set; } - - /// - /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) - /// of the classifier. - /// - /// - /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: - /// (LL(prior) - LL(classifier)) / LL(prior) - /// This metric can be interpreted as the advantage of the classifier over a random prediction. - /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is - /// 20% better than random guessing". - /// - public double LogLossReduction { get; private set; } - - /// - /// If was specified on the - /// evaluator to be k, then TopKAccuracy is the relative number of examples where - /// the true label is one of the top k predicted labels by the predictor. - /// - public double TopKAccuracy { get; private set; } - - /// - /// Gets the log-loss of the classifier for each class. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to the class, - /// and 1 minus the probability returned by the classifier if the instance does not belong to the class. - /// - public double[] PerClassLogLoss { get; private set; } - - /// - /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. - /// For non-CV scenarios, this is equal to null - /// - public string RowTag { get; private set; } - - /// - /// Gets the confusion matrix, or error matrix, of the classifier. - /// - public ConfusionMatrix ConfusionMatrix { get; private set; } - - /// - /// This class contains the public fields necessary to deserialize from IDataView. - /// - private sealed class SerializationClass - { -#pragma warning disable 649 // never assigned - [ColumnName(MultiClassClassifierEvaluator.AccuracyMicro)] - public double AccuracyMicro; - - [ColumnName(MultiClassClassifierEvaluator.AccuracyMacro)] - public double AccuracyMacro; - - [ColumnName(MultiClassClassifierEvaluator.LogLoss)] - public double LogLoss; - - [ColumnName(MultiClassClassifierEvaluator.LogLossReduction)] - public double LogLossReduction; - - [ColumnName(MultiClassClassifierEvaluator.TopKAccuracy)] - public double TopKAccuracy; - - [ColumnName(MultiClassClassifierEvaluator.PerClassLogLoss)] - public double[] PerClassLogLoss; - - [ColumnName(ColumnNames.FoldIndex)] - public string RowTag; -#pragma warning restore 649 // never assigned - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/ClusterEvaluator.cs b/src/Microsoft.ML.Legacy/Models/ClusterEvaluator.cs deleted file mode 100644 index 92ae134872..0000000000 --- a/src/Microsoft.ML.Legacy/Models/ClusterEvaluator.cs +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class ClusterEvaluator - { - /// - /// Computes the quality metrics for the PredictionModel using the specified data set. - /// - /// - /// The trained PredictionModel to be evaluated. - /// - /// - /// The test data that will be predicted and used to evaluate the model. - /// - /// - /// A ClusterMetrics instance that describes how well the model performed against the test data. - /// - public ClusterMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) - { - var environment = new MLContext(); - environment.CheckValue(model, nameof(model)); - environment.CheckValue(testData, nameof(testData)); - - Experiment experiment = environment.CreateExperiment(); - - ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); - if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) - { - throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); - } - - var datasetScorer = new DatasetTransformScorer - { - Data = testDataOutput.Data, - }; - DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); - - Data = scoreOutput.ScoredData; - Output evaluteOutput = experiment.Add(this); - - experiment.Compile(); - - experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); - testData.SetInput(environment, experiment); - - experiment.Run(); - - IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); - - if (overallMetrics == null) - { - throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(ClusterEvaluator)} Evaluate."); - } - - var metric = ClusterMetrics.FromOverallMetrics(environment, overallMetrics); - - Contracts.Assert(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); - - return metric[0]; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/ClusterMetrics.cs b/src/Microsoft.ML.Legacy/Models/ClusterMetrics.cs deleted file mode 100644 index b9432ad646..0000000000 --- a/src/Microsoft.ML.Legacy/Models/ClusterMetrics.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using static Microsoft.ML.Data.MetricKinds; - -namespace Microsoft.ML.Legacy.Models -{ - /// - /// This class contains the overall metrics computed by cluster evaluators. - /// - [Obsolete] - public sealed class ClusterMetrics - { - private ClusterMetrics() - { - } - - internal static List FromOverallMetrics(IHostEnvironment env, IDataView overallMetrics) - { - Contracts.AssertValue(env); - env.AssertValue(overallMetrics); - - var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - if (!metricsEnumerable.GetEnumerator().MoveNext()) - { - throw env.Except("The overall ClusteringMetrics didn't have any rows."); - } - - var metrics = new List(); - foreach (var metric in metricsEnumerable) - { - metrics.Add(new ClusterMetrics() - { - AvgMinScore = metric.AvgMinScore, - Nmi = metric.Nmi, - Dbi = metric.Dbi, - RowTag = metric.RowTag, - }); - } - - return metrics; - } - - /// - /// Davies-Bouldin Index. - /// - /// - /// DBI is a measure of the how much scatter is in the cluster and the cluster separation. - /// - public double Dbi { get; private set; } - - /// - /// Normalized Mutual Information - /// - /// - /// NMI is a measure of the mutual dependence between the true and predicted cluster labels for instances in the dataset. - /// NMI ranges between 0 and 1 where "0" indicates clustering is random and "1" indicates clustering is perfect w.r.t true labels. - /// - public double Nmi { get; private set; } - - /// - /// Average minimum score. - /// - /// - /// AvgMinScore is the average squared-distance of examples from the respective cluster centroids. - /// It is defined as - /// AvgMinScore = (1/m) * sum ((xi - c(xi))^2) - /// where m is the number of instances in the dataset. - /// xi is the i'th instance and c(xi) is the centriod of the predicted cluster for xi. - /// - public double AvgMinScore { get; private set; } - - /// - /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. - /// For non-CV scenarios, this is equal to null - /// - public string RowTag { get; private set; } - - /// - /// This class contains the public fields necessary to deserialize from IDataView. - /// - private sealed class SerializationClass - { -#pragma warning disable 649 // never assigned - [ColumnName(ClusteringEvaluator.Dbi)] - public Double Dbi; - - [ColumnName(ClusteringEvaluator.Nmi)] - public Double Nmi; - - [ColumnName(ClusteringEvaluator.AvgMinScore)] - public Double AvgMinScore; - - [ColumnName(ColumnNames.FoldIndex)] - public string RowTag; -#pragma warning restore 649 // never assigned - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs b/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs deleted file mode 100644 index 0931e2ff7e..0000000000 --- a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; - -namespace Microsoft.ML.Legacy.Models -{ - /// - /// The confusion matrix shows the predicted values vs the actual values. - /// Each row of the matrix represents the instances in a predicted class - /// while each column represents the instances in the actual class. - /// - [Obsolete] - public sealed class ConfusionMatrix - { - private readonly double[,] _elements; - private readonly string[] _classNames; - private readonly Lazy> _classNameIndex; - - private ConfusionMatrix(double[,] elements, string[] classNames) - { - Contracts.AssertValue(elements, nameof(elements)); - Contracts.Assert(elements.GetLength(0) == elements.GetLength(1), $"{nameof(elements)} must be a square matrix."); - Contracts.AssertValue(classNames, nameof(classNames)); - Contracts.Assert(classNames.Length == elements.GetLength(0)); - - _elements = elements; - _classNames = classNames; - _classNameIndex = new Lazy>(() => - { - Dictionary result = new Dictionary(); - for (int i = 0; i < _classNames.Length; i++) - { - result[_classNames[i]] = i; - } - return result; - }); - } - - internal static List Create(IHostEnvironment env, IDataView confusionMatrix) - { - Contracts.AssertValue(env); - env.AssertValue(confusionMatrix); - - if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn)) - { - throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); - } - - RowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); - var slots = default(VBuffer>); - confusionMatrix.Schema[countColumn].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref slots); - var slotsValues = slots.GetValues(); - string[] classNames = new string[slotsValues.Length]; - for (int i = 0; i < slotsValues.Length; i++) - { - classNames[i] = slotsValues[i].ToString(); - } - - ColumnType type = confusionMatrix.Schema[countColumn].Type; - env.Assert(type.IsVector); - ValueGetter> countGetter = cursor.GetGetter>(countColumn); - VBuffer countValues = default; - List confusionMatrices = new List(); - - int valuesRowIndex = 0; - double[,] elements = null; - while (cursor.MoveNext()) - { - if(valuesRowIndex == 0) - elements = new double[type.VectorSize, type.VectorSize]; - - countGetter(ref countValues); - ReadOnlySpan values = countValues.GetValues(); - for (int i = 0; i < values.Length; i++) - { - elements[valuesRowIndex, i] = values[i]; - } - - valuesRowIndex++; - - if(valuesRowIndex == type.VectorSize) - { - valuesRowIndex = 0; - confusionMatrices.Add(new ConfusionMatrix(elements, classNames)); - } - } - - return confusionMatrices; - } - - /// - /// Gets the number of rows or columns in the matrix. - /// - public int Order => _classNames.Length; - - /// - /// Gets the class names of the confusion matrix in the same - /// order as the rows/columns. - /// - public IReadOnlyList ClassNames => _classNames; - - /// - /// Obtains the value at the specified indices. - /// - /// - /// The row index to retrieve. - /// - /// - /// The column index to retrieve. - /// - public double this[int x, int y] - { - get - { - if (x < 0 || x >= Order) - throw new ArgumentOutOfRangeException(nameof(x)); - if (y < 0 || y >= Order) - throw new ArgumentOutOfRangeException(nameof(y)); - - return _elements[x, y]; - } - } - - /// - /// Obtains the value for the specified class names. - /// - /// - /// The name of the class for which row to retrieve. - /// - /// - /// The name of the class for which column to retrieve. - /// - public double this[string x, string y] - { - get - { - if (!_classNameIndex.Value.TryGetValue(x, out int xIndex)) - throw new ArgumentOutOfRangeException(nameof(x)); - - if (!_classNameIndex.Value.TryGetValue(y, out int yIndex)) - throw new ArgumentOutOfRangeException(nameof(y)); - - return this[xIndex, yIndex]; - } - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/OneVersusAll.cs b/src/Microsoft.ML.Legacy/Models/OneVersusAll.cs deleted file mode 100644 index 8020cebaa8..0000000000 --- a/src/Microsoft.ML.Legacy/Models/OneVersusAll.cs +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using static Microsoft.ML.EntryPoints.CommonInputs; - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class OneVersusAll - { - /// - /// One-versus-all, OvA, learner (also known as One-vs.-rest, "OvR") is a multi-class learner - /// with the strategy to fit one binary classifier per class in the dataset. - /// It trains the provided binary classifier for each class against the other classes, where the current - /// class is treated as the positive labels and examples in other classes are treated as the negative classes. - /// See wikipedia page. - /// - /// - /// In order to use it all you need to do is add it to pipeline as regular learner: - /// - /// pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); - /// - /// - /// The base trainer must be a binary classifier. To check the available binary classifiers, type BinaryClassifiers, - /// and look at the available binary learners as suggested by IntelliSense. - /// - /// Underlying binary trainer - /// "Use probabilities (vs. raw outputs) to identify top-score category. - /// By specifying it to false, you can tell One-versus-all to not use the probabilities but instead - /// the raw uncalibrated scores from each predictor. This is generally not recommended, since these quantities - /// are not meant to be comparable from one predictor to another, unlike calibrated probabilities. - public static ILearningPipelineItem With(ITrainerInputWithLabel trainer, bool useProbabilities = true) - { - return new OvaPipelineItem(trainer, useProbabilities); - } - - private class OvaPipelineItem : ILearningPipelineItem - { - private Var _data; - private ITrainerInputWithLabel _trainer; - private bool _useProbabilities; - - public OvaPipelineItem(ITrainerInputWithLabel trainer, bool useProbabilities) - { - _trainer = trainer; - _useProbabilities = useProbabilities; - } - - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - var env = new MLContext(); - var subgraph = env.CreateExperiment(); - subgraph.Add(_trainer); - var ova = new OneVersusAll(); - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - _data = dataStep.Data; - ova.TrainingData = dataStep.Data; - ova.UseProbabilities = _useProbabilities; - ova.Nodes = subgraph; - } - Output output = experiment.Add(ova); - return new OvaPipelineStep(output); - } - - public Var GetInputData() => _data; - } - - private class OvaPipelineStep : ILearningPipelinePredictorStep - { - public OvaPipelineStep(Output output) - { - Model = output.PredictorModel; - } - - public Var Model { get; } - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/OnnxConverter.cs b/src/Microsoft.ML.Legacy/Models/OnnxConverter.cs deleted file mode 100644 index 96d74db10e..0000000000 --- a/src/Microsoft.ML.Legacy/Models/OnnxConverter.cs +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class OnnxConverter - { - /// - /// ONNX is an intermediate representation format - /// for machine learning models. - /// - /// - /// It is used to make models portable such that you can - /// train a model using a toolkit and run it in another tookit's runtime, for example, - /// you can create a model using ML.NET, export it to an ONNX-ML model file, - /// then load and run that ONNX-ML model in Windows ML, on an UWP Windows 10 app. - /// - /// This API converts an ML.NET model to ONNX-ML format by inspecting the transform pipeline - /// from the end, checking for components that know how to save themselves as ONNX. - /// The first item in the transform pipeline that does not know how to save itself - /// as ONNX, is considered the "input" to the ONNX pipeline. (Ideally this would be the - /// original loader itself, but this may not be possible if the user used unsavable - /// transforms in defining the pipe.) All the columns in the source that are a type the - /// ONNX knows how to deal with will be tracked. Intermediate transformations of the - /// data appearing as new columns will appear in the output block of the ONNX, with names - /// derived from the corresponding column names. The ONNX JSON will be serialized to a - /// path defined through the Json option. - /// - /// This API supports the following arguments: - /// - /// indicates the file to write the ONNX protocol buffer file to. This is required. - /// indicates the file to write the JSON representation of the ONNX model. This is optional. - /// indicates the name property in the ONNX model. If left unspecified, it will - /// be the extension-less name of the file specified in the onnx indicates the protocol buffer file - /// to write the ONNX representation to. - /// indicates the domain name of the model. ONNX uses reverse domain name space indicators. - /// For example com.microsoft.cognitiveservices. This is a required field. - /// is a string array of input column names to omit from the input mapping. - /// A common scenario might be to drop the label column, for instance, since it may not be practically - /// useful for the pipeline. Note that any columns depending on these naturally cannot be saved. - /// is similar, except for the output schema. Note that the pipeline handler - /// is currently not intelligent enough to drop intermediate calculations that produce this value: this will - /// merely omit that value from the actual output. - /// - /// - /// Transforms that can be exported to ONNX - /// - /// Concat - /// KeyToVector - /// NAReplace - /// Normalize - /// Term - /// Categorical - /// - /// - /// Learners that can be exported to ONNX - /// - /// FastTree - /// LightGBM - /// Logistic Regression - /// - /// - /// See OnnxTests.cs - /// for an example on how to train a model and then convert that model to ONNX. - /// - /// Model that needs to be converted to ONNX format. - public void Convert(PredictionModel model) - { - var environment = new MLContext(); - environment.CheckValue(model, nameof(model)); - - Experiment experiment = environment.CreateExperiment(); - experiment.Add(this); - experiment.Compile(); - experiment.SetInput(Model, model.PredictorModel); - experiment.Run(); - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/RegressionEvaluator.cs b/src/Microsoft.ML.Legacy/Models/RegressionEvaluator.cs deleted file mode 100644 index 9c57886401..0000000000 --- a/src/Microsoft.ML.Legacy/Models/RegressionEvaluator.cs +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Legacy.Models -{ - public sealed partial class RegressionEvaluator - { - /// - /// Computes the quality metrics for the PredictionModel using the specified data set. - /// - /// - /// The trained PredictionModel to be evaluated. - /// - /// - /// The test data that will be predicted and used to evaluate the model. - /// - /// - /// A RegressionMetrics instance that describes how well the model performed against the test data. - /// - public RegressionMetrics Evaluate(PredictionModel model, ILearningPipelineLoader testData) - { - var environment = new MLContext(); - environment.CheckValue(model, nameof(model)); - environment.CheckValue(testData, nameof(testData)); - - Experiment experiment = environment.CreateExperiment(); - - ILearningPipelineStep testDataStep = testData.ApplyStep(previousStep: null, experiment); - if (!(testDataStep is ILearningPipelineDataStep testDataOutput)) - { - throw environment.Except($"The {nameof(ILearningPipelineLoader)} did not return a {nameof(ILearningPipelineDataStep)} from ApplyStep."); - } - - var datasetScorer = new DatasetTransformScorer - { - Data = testDataOutput.Data, - }; - DatasetTransformScorer.Output scoreOutput = experiment.Add(datasetScorer); - - Data = scoreOutput.ScoredData; - Output evaluteOutput = experiment.Add(this); - - experiment.Compile(); - - experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); - testData.SetInput(environment, experiment); - - experiment.Run(); - - IDataView overallMetrics = experiment.GetOutput(evaluteOutput.OverallMetrics); - - if (overallMetrics == null) - { - throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(RegressionEvaluator)} Evaluate."); - } - - var metric = RegressionMetrics.FromOverallMetrics(environment, overallMetrics); - - if (metric.Count != 1) - throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); - - return metric[0]; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Models/RegressionMetrics.cs b/src/Microsoft.ML.Legacy/Models/RegressionMetrics.cs deleted file mode 100644 index b98578a3a5..0000000000 --- a/src/Microsoft.ML.Legacy/Models/RegressionMetrics.cs +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using static Microsoft.ML.Data.MetricKinds; - -namespace Microsoft.ML.Legacy.Models -{ - /// - /// This class contains the overall metrics computed by regression evaluators. - /// - [Obsolete] - public sealed class RegressionMetrics - { - private RegressionMetrics() - { - } - - internal static List FromOverallMetrics(IHostEnvironment env, IDataView overallMetrics) - { - Contracts.AssertValue(env); - env.AssertValue(overallMetrics); - - var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - if (!metricsEnumerable.GetEnumerator().MoveNext()) - { - throw env.Except("The overall RegressionMetrics didn't have any rows."); - } - - List metrics = new List(); - foreach (var metric in metricsEnumerable) - { - metrics.Add(new RegressionMetrics() - { - L1 = metric.L1, - L2 = metric.L2, - Rms = metric.Rms, - LossFn = metric.LossFn, - RSquared = metric.RSquared, - RowTag = metric.RowTag, - }); - } - - return metrics; - } - - /// - /// Gets the absolute loss of the model. - /// - /// - /// The absolute loss is defined as - /// L1 = (1/m) * sum( abs( yi - y'i)) - /// where m is the number of instances in the test set. - /// y'i are the predicted labels for each instance. - /// yi are the correct labels of each instance. - /// - public double L1 { get; private set; } - - /// - /// Gets the squared loss of the model. - /// - /// - /// The squared loss is defined as - /// L2 = (1/m) * sum(( yi - y'i)^2) - /// where m is the number of instances in the test set. - /// y'i are the predicted labels for each instance. - /// yi are the correct labels of each instance. - /// - public double L2 { get; private set; } - - /// - /// Gets the root mean square loss (or RMS) which is the square root of the L2 loss. - /// - public double Rms { get; private set; } - - /// - /// Gets the user defined loss function. - /// - /// - /// This is the average of a loss function defined by the user, - /// computed over all the instances in the test set. - /// - public double LossFn { get; private set; } - - /// - /// Gets the R squared value of the model, which is also known as - /// the coefficient of determination​. - /// - public double RSquared { get; private set; } - - /// - /// For cross-validation, this is equal to "Fold N" for per-fold metric rows, "Overall" for the average metrics and "STD" for standard deviation. - /// For non-CV scenarios, this is equal to null - /// - public string RowTag { get; private set; } - - /// - /// This class contains the public fields necessary to deserialize from IDataView. - /// - private sealed class SerializationClass - { -#pragma warning disable 649 // never assigned - [ColumnName(ML.Data.RegressionEvaluator.L1)] - public Double L1; - - [ColumnName(ML.Data.RegressionEvaluator.L2)] - public Double L2; - - [ColumnName(ML.Data.RegressionEvaluator.Rms)] - public Double Rms; - - [ColumnName(ML.Data.RegressionEvaluator.Loss)] - public Double LossFn; - - [ColumnName(ML.Data.RegressionEvaluator.RSquared)] - public Double RSquared; - - [ColumnName(ColumnNames.FoldIndex)] - public string RowTag; -#pragma warning restore 649 // never assigned - } - } -} diff --git a/src/Microsoft.ML.Legacy/PredictionModel.cs b/src/Microsoft.ML.Legacy/PredictionModel.cs deleted file mode 100644 index 251e5927ea..0000000000 --- a/src/Microsoft.ML.Legacy/PredictionModel.cs +++ /dev/null @@ -1,214 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.IO; -using System.Threading.Tasks; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; - -namespace Microsoft.ML.Legacy -{ - public class PredictionModel - { - private readonly IHostEnvironment _env; - - internal PredictionModel(Stream stream) - { - _env = new MLContext(); - AssemblyRegistration.RegisterAssemblies(_env); - PredictorModel = new TransformModelImpl(_env, stream); - } - - internal TransformModel PredictorModel { get; } - - /// - /// Returns labels that correspond to indices of the score array in the case of - /// multi-class classification problem. - /// - /// Label to score mapping - /// Name of the score column - /// - public bool TryGetScoreLabelNames(out string[] names, string scoreColumnName = DefaultColumnNames.Score) - { - names = null; - var schema = PredictorModel.OutputSchema; - int colIndex = -1; - if (!schema.TryGetColumnIndex(scoreColumnName, out colIndex)) - return false; - - int expectedLabelCount = schema[colIndex].Type.ValueCount; - if (!schema[colIndex].HasSlotNames(expectedLabelCount)) - return false; - - VBuffer> labels = default; - schema[colIndex].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref labels); - - if (labels.Length != expectedLabelCount) - return false; - - names = new string[expectedLabelCount]; - int index = 0; - foreach (var label in labels.DenseValues()) - names[index++] = label.ToString(); - - return true; - } - - /// - /// Read model from file asynchronously. - /// - /// Path to the file - /// Model - public static Task ReadAsync(string path) - { - if (string.IsNullOrEmpty(path)) - throw new ArgumentNullException(nameof(path)); - - using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) - { - return ReadAsync(stream); - } - } - - /// - /// Read model from stream asynchronously. - /// - /// Stream with model - /// Model - public static Task ReadAsync(Stream stream) - { - if (stream == null) - throw new ArgumentNullException(nameof(stream)); - return Task.FromResult(new PredictionModel(stream)); - } - - /// - /// Read generic model from file. - /// - /// Type for incoming data - /// Type for output data - /// Path to the file - /// Model - public static Task> ReadAsync(string path) - where TInput : class - where TOutput : class, new() - { - if (string.IsNullOrEmpty(path)) - throw new ArgumentNullException(nameof(path)); - - using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) - { - return ReadAsync(stream); - } - } - - /// - /// Read generic model from file. - /// - /// Type for incoming data - /// Type for output data - /// Stream with model - /// Model - public static Task> ReadAsync(Stream stream) - where TInput : class - where TOutput : class, new() - { - if (stream == null) - throw new ArgumentNullException(nameof(stream)); - - var environment = new MLContext(); - AssemblyRegistration.RegisterAssemblies(environment); - - BatchPredictionEngine predictor = - environment.CreateBatchPredictionEngine(stream); - - return Task.FromResult(new PredictionModel(predictor, stream)); - } - - /// - /// Run prediction on top of IDataView. - /// - /// Incoming IDataView - /// IDataView which contains predictions - public IDataView Predict(IDataView input) => PredictorModel.Apply(_env, input); - - /// - /// Save model to file. - /// - /// File to save model - /// - public Task WriteAsync(string path) - { - if (string.IsNullOrEmpty(path)) - throw new ArgumentNullException(nameof(path)); - - using (var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Read)) - { - return WriteAsync(stream); - } - } - - /// - /// Save model to stream. - /// - /// Stream to save model. - /// - public Task WriteAsync(Stream stream) - { - if (stream == null) - throw new ArgumentNullException(nameof(stream)); - PredictorModel.Save(_env, stream); - return Task.CompletedTask; - } - } - - public class PredictionModel : PredictionModel - where TInput : class - where TOutput : class, new() - { - private BatchPredictionEngine _predictor; - - internal PredictionModel(BatchPredictionEngine predictor, Stream stream) - : base(stream) - { - _predictor = predictor; - } - - /// - /// Run prediction for the TInput data. - /// - /// Input data - /// Result of prediction - public TOutput Predict(TInput input) - { - int count = 0; - TOutput result = null; - foreach (var item in _predictor.Predict(new[] { input }, reuseRowObjects: false)) - { - if (count == 0) - result = item; - - count++; - if (count > 1) - break; - } - - if (count > 1) - throw new InvalidOperationException("Prediction pipeline must return at most one prediction per example."); - return result; - } - - /// - /// Run prediction for collection of inputs. - /// - /// Input data - /// Result of prediction - public IEnumerable Predict(IEnumerable inputs) - { - return _predictor.Predict(inputs, reuseRowObjects: false); - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/EntryPointGeneratorBase.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/EntryPointGeneratorBase.cs deleted file mode 100644 index 62c45a3a58..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/EntryPointGeneratorBase.cs +++ /dev/null @@ -1,136 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom.Compiler; -using Microsoft.ML.CommandLine; - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal abstract class EntryPointGeneratorBase : GeneratorBase - { - protected override void GenerateContent(IndentedTextWriter writer, string prefix, ComponentCatalog.LoadableClassInfo component, string moduleId) - { - GenerateSummaryComment(writer, component); - GenerateReturnComment(writer); - GenerateModuleAttribute(writer, prefix, component, moduleId); - GenerateOutputPort(writer); - GenerateModuleType(writer, component); - writer.WriteLine("[Obsolete]"); - GenerateMethodSignature(writer, prefix, component); - GenerateImplCall(writer, prefix, component); - } - - protected abstract void GenerateSummaryComment(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component); - - protected void GenerateSummaryComment(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - GenerateParameterComment(w, arg.LongName + argSuffix, arg.HelpText); - } - - protected void GenerateParameterComment(IndentedTextWriter w, string name, string description) - { - w.WriteLine("/// {1}", name, description); - } - - protected abstract void GenerateReturnComment(IndentedTextWriter w); - - protected abstract void GenerateModuleAttribute(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component, string moduleId); - - protected abstract void GenerateOutputPort(IndentedTextWriter w); - - protected void GenerateModuleType(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component) - { - string cat; - if (component.IsOfType(typeof(SignatureBinaryClassifierTrainer))) - cat = "BinaryClassifier"; - else if (component.IsOfType(typeof(SignatureMultiClassClassifierTrainer))) - cat = "MultiClassClassifier"; - else if (component.IsOfType(typeof(SignatureRegressorTrainer))) - cat = "Regression"; - else if (component.IsOfType(typeof(SignatureAnomalyDetectorTrainer))) - cat = "AnomalyDetector"; - else - cat = "None"; - w.WriteLine("[DataLabModuleType(Type = ModuleType.{0})]", cat); - } - - protected abstract void GenerateMethodSignature(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component); - - protected void GenerateMethodSignature(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string parent, string parentType, string parentValue, ref string linePrefix, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - w.WriteLine(linePrefix); - linePrefix = ","; - if (IsColumnType(arg)) - { - GenerateDataLabParameterAttribute(w, arg.LongName, false, arg.LongName, null, arg.HelpText, parent, parentType, parentValue); - GenerateParameter(w, "string", arg.LongName + argSuffix); - } - else - { - GenerateDataLabParameterAttribute(w, arg.LongName, IsOptional(arg), arg.LongName, Stringify(arg.DefaultValue), arg.HelpText, parent, parentType, parentValue); - GenerateParameter(w, GetCSharpTypeName(arg.ItemType), arg.LongName + argSuffix); - } - } - - protected void GenerateDataLabParameterAttribute(IndentedTextWriter w, string friendlyName, - bool isOptional, string displayName, object defaultValue, string description, string parent = null, - string parentType = null, string parentValue = null) - { - string p = parent != null ? string.Format(" ParentParameter = \"{0}\",", parent) : ""; - string pv = parentValue != null - ? string.Format(" ParentParameterValue = new object[] {{ {0}.{1} }},", parentType, parentValue) - : ""; - string dv = defaultValue != null ? string.Format(" DefaultValue = {0},", defaultValue) : ""; - w.WriteLine( - "[DataLabParameter(FriendlyName = \"{0}\", IsOptional = {1}, DisplayName = \"{2}\",{3}{4}{5} Description = \"{6}\")]", - friendlyName, isOptional ? "true" : "false", displayName, p, pv, dv, description); - } - - protected abstract void GenerateImplCall(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component); - - protected void GenerateImplCall(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - if (IsColumnType(arg)) - w.WriteLine("builder.{0} = {1}.Split('|');", Capitalize(arg.LongName + argSuffix), arg.LongName + argSuffix); - else - w.WriteLine("builder.{0} = {1}{2};", Capitalize(arg.LongName + argSuffix), CastIfNeeded(arg), arg.LongName + argSuffix); - } - - protected override string GetCSharpTypeName(Type type) - { - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) - return GetCSharpTypeName(type.GetGenericArguments()[0]) + "?"; - - // REVIEW: How are long/uint params going to be surfaced in AML? - // long and uint params are not supported, falling back to int - if (type == typeof(long) || type == typeof(uint)) - return "int"; - - return base.GetCSharpTypeName(type); - } - - protected override void GenerateUsings(IndentedTextWriter w) - { - w.WriteLine("using System;"); - w.WriteLine("using System.Diagnostics.CodeAnalysis;"); - w.WriteLine("using System.Linq;"); - w.WriteLine("using Microsoft.Analytics.MachineLearning;"); - w.WriteLine("using Microsoft.Analytics.Modules;"); - w.WriteLine("using Microsoft.ML;"); - w.WriteLine("using Microsoft.ML.CommandLine;"); - w.WriteLine("using Microsoft.ML.Data;"); - w.WriteLine("using Microsoft.ML.Internal.Internallearn;"); - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/GeneratorBase.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/GeneratorBase.cs deleted file mode 100644 index 8a0effcad6..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/GeneratorBase.cs +++ /dev/null @@ -1,214 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.Reflection; -using Microsoft.CSharp; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Data; -using Microsoft.ML.Internal.Utilities; -using Microsoft.ML.Transforms; - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal abstract class GeneratorBase - { - protected string Name; - protected string Owner; - protected string Version; - protected string State; - protected string ModuleType; - protected string Determinism; - protected string Category; - protected HashSet Exclude; - protected HashSet Namespaces; - - /// - /// Generate the module and its implementation. - /// - /// The writer. - /// The module prefix. - /// The command string used to generate. - /// The component. - /// - /// - /// - /// - /// - /// - /// - /// - /// The set of parameters to exclude - /// The set of extra namespaces - public void Generate(IndentedTextWriter writer, string prefix, string regenerate, ComponentCatalog.LoadableClassInfo component, - string moduleId, string moduleName, string moduleOwner, string moduleVersion, string moduleState, string moduleType, string moduleDeterminism, string moduleCategory, - HashSet exclude, HashSet namespaces) - { - Contracts.AssertValue(exclude); - Name = moduleName; - Owner = moduleOwner; - Version = moduleVersion; - State = moduleState; - ModuleType = moduleType; - Determinism = moduleDeterminism; - Category = moduleCategory; - Exclude = exclude; - Namespaces = namespaces; - - GenerateHeader(writer, regenerate); - using (writer.Nest()) - { - GenerateClassName(writer, prefix, component); - using (writer.Nest()) - GenerateContent(writer, prefix, component, moduleId); - GenerateFooter(writer); - } - GenerateFooter(writer); - } - - private void GenerateHeader(IndentedTextWriter w, string regenerate) - { - w.WriteLine("//------------------------------------------------------------------------------"); - w.WriteLine("// "); - w.WriteLine("// This code was generated by a tool."); - w.WriteLine("//"); - w.WriteLine("// Changes to this file may cause incorrect behavior and will be lost if"); - w.WriteLine("// the code is regenerated."); - w.WriteLine("// "); - w.WriteLine("//------------------------------------------------------------------------------"); - w.WriteLine("// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); - w.WriteLine("// DO NOT MODIFY THIS FILE"); - w.WriteLine("// This file is generated from the TLC components, please don't modify."); - w.WriteLine("// The following command was used to generate this file: " + regenerate); - w.WriteLine("// Version used to generate this file: " + Assembly.GetExecutingAssembly().GetName().Version); - w.WriteLine("// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); - w.WriteLine(); - w.WriteLine("namespace Microsoft.Analytics.Platform.ML.IDVUtils"); - w.WriteLine("{"); - using (w.Nest()) - GenerateUsings(w); - } - - protected abstract void GenerateUsings(IndentedTextWriter w); - - protected virtual void GenerateClassName(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine(); - var className = prefix + component.LoadNames[0]; - w.WriteLine("/// Module: {0}", className); - w.WriteLine("public partial class {0}", className); - w.WriteLine("{"); - } - - protected abstract void GenerateContent(IndentedTextWriter writer, string prefix, ComponentCatalog.LoadableClassInfo component, string moduleId); - - private void GenerateFooter(IndentedTextWriter w) - { - w.WriteLine("}"); - } - - protected virtual string EnumName(CmdParser.ArgInfo.Arg arg, Type sigType) - { - return Capitalize(arg.LongName) + ComponentCatalog.SignatureToString(sigType); - } - - protected static string Capitalize(string s) - { - if (string.IsNullOrEmpty(s)) - return s; - return char.ToUpperInvariant(s[0]) + s.Substring(1); - } - - protected bool IsColumnType(CmdParser.ArgInfo.Arg arg) - { - return - typeof(OneToOneColumn).IsAssignableFrom(arg.ItemType) || - typeof(ManyToOneColumn).IsAssignableFrom(arg.ItemType) || - arg.ItemType == typeof(GenerateNumberTransform.Column); - } - - protected bool IsStringColumnType(CmdParser.ArgInfo.Arg arg) - { - return arg.LongName == "column"; - } - - protected bool IsTrainer(Type sigType) - { - return - sigType == typeof(SignatureTrainer) || - sigType == typeof(SignatureBinaryClassifierTrainer) || - sigType == typeof(SignatureMultiClassClassifierTrainer) || - sigType == typeof(SignatureRegressorTrainer) || - sigType == typeof(SignatureMultiOutputRegressorTrainer) || - sigType == typeof(SignatureRankerTrainer) || - sigType == typeof(SignatureAnomalyDetectorTrainer) || - sigType == typeof(SignatureClusteringTrainer) || - sigType == typeof(SignatureSequenceTrainer); - } - - protected virtual void GenerateParameter(IndentedTextWriter w, string type, string name) - { - w.Write("{0} {1}", type, name); - } - - protected virtual string GetCSharpTypeName(Type type) - { - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) - return GetCSharpTypeName(type.GetGenericArguments()[0]) + "?"; - - string name; - using (var p = new CSharpCodeProvider()) - name = p.GetTypeOutput(new CodeTypeReference(type)); - return name; - } - - protected bool IsOptional(CmdParser.ArgInfo.Arg arg) - { - return - arg.IsRequired && arg.DefaultValue != null || - arg.ItemType.IsGenericType && arg.ItemType.GetGenericTypeDefinition() == typeof(Nullable<>) || - arg.ItemType == typeof(string); - } - - protected static string CastIfNeeded(CmdParser.ArgInfo.Arg arg) - { - return arg.ItemType == typeof(uint) ? "(uint)" : ""; - } - - protected object Stringify(object value) - { - if (value == null) - return null; - var arr = value as Array; - if (arr != null && arr.GetLength(0) > 0) - return Stringify(arr.GetValue(0)); - var strval = value as string; - if (strval != null) - { - if (strval == "") - return "string.Empty"; - return Quote(strval); - } - if (value is double) - return ((double)value).ToString("R") + "d"; - if (value is float) - return ((float)value).ToString("R") + "f"; - if (value is bool) - return (bool)value ? "true" : "false"; - var type = value.GetType(); - if (type.IsEnum) - return GetCSharpTypeName(type) + "." + value; - return value; - } - - private string Quote(string src) - { - var dst = src.Replace("\\", "\\\\").Replace("\"", "\\\"").Replace("\n", "\\n").Replace("\r", "\\r"); - return "\"" + dst + "\""; - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ImplGeneratorBase.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ImplGeneratorBase.cs deleted file mode 100644 index 7bdf29276d..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ImplGeneratorBase.cs +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom.Compiler; -using System.Linq; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Internal.Utilities; - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal abstract class ImplGeneratorBase : GeneratorBase - { - protected override void GenerateContent(IndentedTextWriter writer, string prefix, ComponentCatalog.LoadableClassInfo component, string moduleId) - { - GenerateImplFields(writer, component, (w, a) => GenerateFieldsOrProperties(w, a, "", GenerateField)); - GenerateImplFields(writer, component, (w, a) => GenerateFieldsOrProperties(w, a, "", GenerateProperty)); - GenerateMethodSignature(writer, prefix, component); - GenerateImplBody(writer, component); - } - - protected void GenerateImplFields(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component, - Action fieldGenerator) - { - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - var arguments = argumentInfo.Args.Where(a => !a.IsHidden).ToArray(); - foreach (var arg in arguments) - fieldGenerator(w, arg); - } - - /// - /// Generates private fields and public properties for all the fields in the arguments. - /// Recursively generate fields and properties for subcomponents. - /// - protected void GenerateFieldsOrProperties(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string argSuffix, - Action oneFieldGenerator) - { - if (Exclude.Contains(arg.LongName)) - return; - - object val = Stringify(arg.DefaultValue); - string defVal = val == null ? "" : string.Format(" = {0}", val); - var typeName = IsColumnType(arg) ? "string[]" : IsStringColumnType(arg) ? GetCSharpTypeName(arg.Field.FieldType) : GetCSharpTypeName(arg.ItemType); - oneFieldGenerator(w, typeName, arg.LongName + argSuffix, defVal, arg.ItemType == typeof(bool), arg.HelpText); - } - - protected static void GenerateField(IndentedTextWriter w, string typeName, string argName, string defVal, - bool isBool, string helpText) - { - w.WriteLine("private {0} {1}{2};", typeName, argName, defVal); - w.WriteLine(); - } - - protected static void GenerateProperty(IndentedTextWriter w, string typeName, string argName, string defVal, - bool isBool, string helpText) - { - var help = helpText ?? argName; - help = help.Replace("&", "&").Replace("<", "<").Replace(">", ">"); - w.WriteLine("/// Gets or sets {0}{1} ", isBool ? "a value indicating whether " : "", help); - w.WriteLine("public {0} {1}", typeName, Capitalize(argName)); - w.WriteLine("{"); - using (w.Nest()) - { - w.WriteLine("get {{ return {0}; }}", argName); - w.WriteLine("set {{ {0} = value; }}", argName); - } - w.WriteLine("}"); - w.WriteLine(); - } - - protected abstract void GenerateMethodSignature(IndentedTextWriter w, string prefix, - ComponentCatalog.LoadableClassInfo component); - - protected abstract void GenerateImplBody(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component); - - protected void GenerateImplBody(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - if (arg.IsCollection) - { - if (IsColumnType(arg)) - w.WriteLine("args{0}.{1} = {1}.Select({2}.Parse).ToArray();", argSuffix, arg.LongName, GetCSharpTypeName(arg.ItemType)); - else if (IsStringColumnType(arg)) - w.WriteLine("args{0}.{1} = {2};", argSuffix, arg.LongName, arg.LongName + argSuffix); - else - w.WriteLine("args{0}.{1} = new[] {{ {2} }};", argSuffix, arg.LongName, arg.LongName + argSuffix); - } - else - w.WriteLine("args{0}.{1} = {2};", argSuffix, arg.LongName, arg.LongName + argSuffix); - } - - protected override void GenerateUsings(IndentedTextWriter w) - { - w.WriteLine("using System;"); - w.WriteLine("using System.Linq;"); - w.WriteLine("using Microsoft.ML;"); - w.WriteLine("using Microsoft.ML.CommandLine;"); - w.WriteLine("using Microsoft.ML.Data;"); - w.WriteLine("using Microsoft.ML.Internal.Internallearn;"); - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/LearnerGenerators.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/LearnerGenerators.cs deleted file mode 100644 index 533a3fe8c4..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/LearnerGenerators.cs +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom.Compiler; -using System.IO; -using System.Linq; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Internal.Utilities; - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal class LearnerImplGenerator : ImplGeneratorBase - { - protected override void GenerateMethodSignature(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("/// "); - w.WriteLine("/// Creates a {0}", component.LoadNames[0]); - w.WriteLine("/// "); - w.WriteLine("/// A tuple containing learner name and settings."); - w.WriteLine("[Obsolete]"); - w.WriteLine("public Tuple GetTlcSettings()"); - } - - protected override void GenerateImplBody(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("{"); - using (w.Nest()) - { - w.WriteLine("var args = new {0}();", GetCSharpTypeName(component.ArgType)); - w.WriteLine("var defs = new {0}();", GetCSharpTypeName(component.ArgType)); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - var arguments = argumentInfo.Args.Where(a => !a.IsHidden).ToArray(); - foreach (var arg in arguments) - GenerateImplBody(w, arg, ""); - w.WriteLine("return new Tuple(\"{0}\", CmdParser.GetSettings(args, defs));", component.LoadNames[0]); - } - w.WriteLine("}"); - } - } - - internal sealed class LearnerEntryPointGenerator : EntryPointGeneratorBase - { - protected override void GenerateSummaryComment(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("/// "); - var desc = component.Summary ?? component.LoadNames[0]; - using (var sr = new StringReader(desc)) - { - string line; - while ((line = sr.ReadLine()) != null) - w.WriteLine("/// {0}", line); - } - w.WriteLine("/// "); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - var arguments = argumentInfo.Args.Where(a => !a.IsHidden).ToArray(); - foreach (var arg in arguments) - GenerateSummaryComment(w, arg, ""); - } - - protected override void GenerateReturnComment(IndentedTextWriter w) - { - w.WriteLine("/// An untrained model."); - } - - protected override void GenerateModuleAttribute(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component, string moduleId) - { - if (!string.IsNullOrEmpty(prefix)) - prefix += " "; - w.WriteLine("[DataLabModule(FriendlyName = \"{0}{1}\",", prefix, component.UserName); - using (w.Nest()) - { - var desc = component.Summary ?? component.LoadNames[0]; - w.WriteLine("Description = \"{0}\",", desc.Replace("\n", "\\n").Replace("\r", "\\r")); - string cat; - if (component.IsOfType(typeof(SignatureBinaryClassifierTrainer)) || - component.IsOfType(typeof(SignatureMultiClassClassifierTrainer))) - { - cat = @"Machine Learning\Initialize Model\Classification"; - } - else if (component.IsOfType(typeof(SignatureRegressorTrainer))) - cat = @"Machine Learning\Initialize Model\Regression"; - else if (component.IsOfType(typeof(SignatureAnomalyDetectorTrainer))) - cat = @"Machine Learning\Initialize Model\Anomaly Detection"; - else - cat = @"Machine Learning\Initialize Model"; - - w.WriteLine("Category = @\"{0}\",", cat); - w.WriteLine("IsBlocking = true,"); - w.WriteLine("IsDeterministic = true,"); - w.WriteLine("Version = \"2.0\","); - w.WriteLine("Owner = \"Microsoft Corporation\","); - w.WriteLine("FamilyId = \"{{{0}}}\",", Guid.NewGuid().ToString().ToUpperInvariant()); - w.WriteLine("ReleaseState = States.Alpha)]"); - } - } - - protected override void GenerateOutputPort(IndentedTextWriter w) - { - w.WriteLine( - "[DataLabOutputPort(FriendlyName = \"Untrained model\", DisplayName = \"Untrained model\", Position = 0, DataType = WellKnownDataTypeIds.ITrainerDotNet, Description = \"An untrained model.\")]"); - } - - protected override void GenerateMethodSignature(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("[Obsolete]"); - w.Write("public static Tuple Create{0}{1}(", prefix, component.LoadNames[0]); - using (w.Nest()) - { - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - var arguments = argumentInfo.Args.Where(a => !a.IsHidden).ToArray(); - var pre = ""; - foreach (var arg in arguments) - GenerateMethodSignature(w, arg, null, null, null, ref pre, ""); - w.WriteLine(")"); - } - } - - protected override void GenerateImplCall(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("{"); - using (w.Nest()) - { - var className = prefix + component.LoadNames[0]; - w.WriteLine("var builder = new {0}();", className); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - var arguments = argumentInfo.Args.Where(a => !a.IsHidden).ToArray(); - foreach (var arg in arguments) - GenerateImplCall(w, arg, ""); - w.WriteLine("var learner = builder.GetTlcSettings();"); - w.WriteLine("return new TlcTrainer(learner.Item1, learner.Item2);"); - } - w.WriteLine("}"); - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ModuleGenerator.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ModuleGenerator.cs deleted file mode 100644 index 918318b266..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/ModuleGenerator.cs +++ /dev/null @@ -1,231 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#pragma warning disable 420 // volatile with Interlocked.CompareExchange - -using System; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading; -using Microsoft.ML; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints.CodeGen; -using Microsoft.ML.Tools; - -[assembly: LoadableClass(typeof(ModuleGenerator), typeof(ModuleGenerator.Arguments), typeof(SignatureModuleGenerator), - "Module generator", "ModuleGenerator", "Module")] - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal sealed class ModuleGenerator : IGenerator - { - private readonly string _modulePrefix; - private readonly bool _generateModule; - private readonly bool _generateModuleInstance; - private readonly string _regenerate; - private readonly string _moduleId; - private readonly string _moduleName; - private readonly string _moduleOwner; - private readonly string _moduleVersion; - private readonly string _moduleState; - private readonly string _moduleType; - private readonly string _moduleDeterminism; - private readonly string _moduleCategory; - private readonly HashSet _exclude; - private readonly HashSet _namespaces; - private readonly IHost _host; - - public class Arguments - { - [Argument(ArgumentType.AtMostOnce, HelpText = "The prefix for the generated AML module", ShortName = "prefix")] - public string ModulePrefix; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to generate the module entry point", ShortName = "module")] - public bool GenerateModule = true; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to generate the module entry point as Module Instance", ShortName = "moduleinst")] - public bool GenerateModuleInstance = true; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module id", ShortName = "id")] - public string ModuleId; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module name", ShortName = "name")] - public string ModuleName; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module owner", ShortName = "owner")] - public string ModuleOwner = "Microsoft"; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module version", ShortName = "version")] - public string ModuleVersion = "0.0.0.0"; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module state", ShortName = "state")] - public string ModuleState = "Alpha"; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module type", ShortName = "type")] - public string ModuleType = "User"; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module determinism", ShortName = "determinism")] - public string ModuleDeterminism = "Deterministic"; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The module category", ShortName = "category")] - public string ModuleCategory = "V2\\Transforms"; - - [Argument(ArgumentType.Multiple, HelpText = "Arguments to exclude", ShortName = "excl")] - public string[] Exclude; - - [Argument(ArgumentType.Multiple, HelpText = "Extra namespaces", ShortName = "using")] - public string[] Namespaces; - } - - public ModuleGenerator(IHostEnvironment env, Arguments args, string regenerate) - { - Contracts.AssertValue(args, "args"); - Contracts.AssertNonEmpty(regenerate, "regenerate"); - - _host = env.Register("ModuleGenerator"); - _modulePrefix = args.ModulePrefix; - _regenerate = regenerate; - _generateModule = args.GenerateModule; - _generateModuleInstance = args.GenerateModuleInstance; - _moduleId = string.IsNullOrEmpty(args.ModuleId) ? null : args.ModuleId; - _moduleName = string.IsNullOrEmpty(args.ModuleName) ? null : args.ModuleName; - _moduleOwner = args.ModuleOwner; - _moduleVersion = args.ModuleVersion; - _moduleState = args.ModuleState; - _moduleType = args.ModuleType; - _moduleDeterminism = args.ModuleDeterminism; - _moduleCategory = args.ModuleCategory; - _exclude = new HashSet(); - foreach (var excl in args.Exclude) - { - if (!string.IsNullOrEmpty(excl)) - _exclude.Add(excl); - } - _namespaces = new HashSet(); - foreach (var ns in args.Namespaces) - { - if (!string.IsNullOrEmpty(ns)) - _namespaces.Add(ns); - } - } - - private static volatile Dictionary _entryPointGeneratorMapping; - private static volatile Dictionary _moduleInstanceEntryPointGeneratorMapping; - private static volatile Dictionary _implGeneratorMapping; - - private static Dictionary EntryPointGeneratorMapping - { - get - { - if (_entryPointGeneratorMapping == null) - { - var tmp = new Dictionary(); - tmp.Add(typeof(SignatureTrainer), new LearnerEntryPointGenerator()); - tmp.Add(typeof(SignatureDataTransform), new TransformEntryPointGenerator()); - Interlocked.CompareExchange(ref _entryPointGeneratorMapping, tmp, null); - } - return _entryPointGeneratorMapping; - } - } - - private static Dictionary ModuleInstanceEntryPointGeneratorMapping - { - get - { - if (_moduleInstanceEntryPointGeneratorMapping == null) - { - var tmp = new Dictionary(); - tmp.Add(typeof(SignatureDataTransform), new TransformModuleInstanceEntryPointGenerator()); - Interlocked.CompareExchange(ref _moduleInstanceEntryPointGeneratorMapping, tmp, null); - } - return _moduleInstanceEntryPointGeneratorMapping; - } - } - - private static Dictionary ImplGeneratorMapping - { - get - { - if (_implGeneratorMapping == null) - { - var tmp = new Dictionary(); - tmp.Add(typeof(SignatureTrainer), new LearnerImplGenerator()); - tmp.Add(typeof(SignatureDataTransform), new TransformImplGenerator()); - Interlocked.CompareExchange(ref _implGeneratorMapping, tmp, null); - } - return _implGeneratorMapping; - } - } - - public void Generate(IEnumerable infos) - { - using (var ch = _host.Start("Generate")) - { - foreach (var info in infos.Select(c => c.Info)) - GenerateFile(ch, info); - } - } - - private void GenerateFile(IChannel ch, ComponentCatalog.LoadableClassInfo info) - { - _host.AssertValue(ch); - ch.AssertValue(info); - - string name = info.LoadNames[0]; - if (!info.IsOfType(typeof(SignatureTrainer)) && !info.IsOfType(typeof(SignatureDataTransform))) - { - ch.Warning("No generator available for {0}.", name); - return; - } - - if (info.Constructor == null && info.CreateMethod == null) - { - ch.Warning("No construction method available for {0}.", name); - return; - } - - if (_generateModule) - { - var entryPointFile = _modulePrefix + name + "EntryPoint.cs"; - if (_generateModuleInstance) - GenerateFile(info, entryPointFile, ModuleInstanceEntryPointGeneratorMapping); - else - GenerateFile(info, entryPointFile, EntryPointGeneratorMapping); - } - - var implFile = _modulePrefix + name + ".cs"; - GenerateFile(info, implFile, ImplGeneratorMapping); - } - - private void GenerateFile(ComponentCatalog.LoadableClassInfo info, string filename, Dictionary mapping) - { - using (var sw = new StreamWriter(filename)) - { - var writer = new IndentedTextWriter(sw, " "); - foreach (var kvp in mapping) - { - if (info.IsOfType(kvp.Key)) - { - writer.WriteLine("[Obsolete]"); - kvp.Value.Generate(writer, _modulePrefix, _regenerate, info, - _moduleId ?? Guid.NewGuid().ToString(), _moduleName, _moduleOwner, _moduleVersion, _moduleState, - _moduleType, _moduleDeterminism, _moduleCategory, _exclude, _namespaces); - break; - } - } - } - } - } - - internal static class GeneratorUtils - { - public static bool IsOfType(this ComponentCatalog.LoadableClassInfo component, Type type) - { - return component.SignatureTypes != null && component.SignatureTypes.Contains(type); - } - } -} diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/TransformGenerators.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/TransformGenerators.cs deleted file mode 100644 index fb1c40aa16..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CodeGen/TransformGenerators.cs +++ /dev/null @@ -1,403 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Internal.Utilities; - -namespace Microsoft.ML.EntryPoints.CodeGen -{ - internal sealed class TransformImplGenerator : ImplGeneratorBase - { - protected override void GenerateMethodSignature(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("/// "); - w.WriteLine("/// Creates a {0}", component.LoadNames[0]); - w.WriteLine("/// "); - w.WriteLine("/// The environment"); - w.WriteLine("/// The data set"); - w.WriteLine("/// The transformed data."); - w.WriteLine("[Obsolete]"); - w.WriteLine("public IDataView Create{0}{1}Impl(", prefix, component.LoadNames[0]); - using (w.Nest()) - { - w.WriteLine("IHostEnvironment env,"); - w.WriteLine("IDataView data)"); - } - } - - protected override void GenerateImplBody(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("{"); - using (w.Nest()) - { - if (component.ArgType == null) - { - var call = GenerateCall(component); - w.WriteLine("return {0}(env, data);", call); - } - else - { - w.WriteLine("var args = new {0}();", GetCSharpTypeName(component.ArgType)); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateImplBody(w, arg, ""); - var call = GenerateCall(component); - w.WriteLine("return {0}(args, env, data);", call); - } - } - w.WriteLine("}"); - } - - private string GenerateCall(ComponentCatalog.LoadableClassInfo component) - { - // The caller needs to ensure that the component has either a constructor or a create method. - Contracts.Assert(component.Constructor != null || component.CreateMethod != null); - string call; - if (component.Constructor != null) - { - var type = GetCSharpTypeName(component.Constructor.DeclaringType); - call = string.Format("new {0}", type); - } - else - { - var type = GetCSharpTypeName(component.CreateMethod.DeclaringType); - var name = component.CreateMethod.Name; - call = string.Format("{0}.{1}", type, name); - } - return call; - } - } - - internal sealed class TransformEntryPointGenerator : EntryPointGeneratorBase - { - protected override void GenerateSummaryComment(IndentedTextWriter w, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("/// "); - var desc = component.Summary ?? component.LoadNames[0]; - using (var sr = new StringReader(desc)) - { - string line; - while ((line = sr.ReadLine()) != null) - w.WriteLine("/// {0}", line); - } - w.WriteLine("/// "); - GenerateParameterComment(w, "data", "The data"); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateSummaryComment(w, arg, ""); - } - - protected override void GenerateReturnComment(IndentedTextWriter w) - { - w.WriteLine("/// A Tuple of transformed data and trained transform."); - } - - protected override void GenerateModuleAttribute(IndentedTextWriter w, string prefix, - ComponentCatalog.LoadableClassInfo component, string moduleId) - { - if (!string.IsNullOrEmpty(prefix)) - prefix += " "; - w.WriteLine("[DataLabModule(FriendlyName = \"{0}{1}\",", prefix, component.UserName); - using (w.Nest()) - { - var desc = component.Summary ?? component.LoadNames[0]; - w.WriteLine("Description = \"{0}\",", desc.Replace("\n", "\\n").Replace("\r", "\\r")); - w.WriteLine("IsBlocking = true,"); - w.WriteLine("IsDeterministic = true,"); - w.WriteLine("Version = \"2.0\","); - w.WriteLine("Owner = \"Microsoft Corporation\","); - w.WriteLine("FamilyId = \"{{{0}}}\",", moduleId.ToUpperInvariant()); - w.WriteLine("ReleaseState = States.Alpha)]"); - } - } - - protected override void GenerateOutputPort(IndentedTextWriter w) - { - w.WriteLine( - "[DataLabOutputPort(FriendlyName = \"Transformed IDataView\", DisplayName = \"Transformed IDataView\", Position = 0, DataType = WellKnownDataTypeIds.IDataViewDotNet, Description = \"Transformed data (IDataView)\")]"); - w.WriteLine( - "[DataLabOutputPort(FriendlyName = \"Transformed data model\", DisplayName = \"Transformed data model\", Position = 1, DataType = WellKnownDataTypeIds.ITransformDotNet, Description = \"Transformed data model (ITransform)\")]"); - } - - protected override void GenerateMethodSignature(IndentedTextWriter w, string prefix, - ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("[Obsolete]"); - w.WriteLine("public static Tuple Create{0}{1}(", prefix, component.LoadNames[0]); - using (w.Nest()) - { - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - w.WriteLine("[DataLabInputPort(FriendlyName = \"IDataView\", DisplayName = \"IDataView\", IsOptional = false, DataTypes = WellKnownDataTypeIds.IDataViewDotNet, Description = \"Input data (IDataView)\")]"); - w.Write("IDataView data"); - var pre = ","; - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateMethodSignature(w, arg, null, null, null, ref pre, ""); - w.WriteLine(")"); - } - } - - protected override void GenerateImplCall(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine("{"); - using (w.Nest()) - { - var className = prefix + component.LoadNames[0]; - w.WriteLine("var builder = new {0}();", className); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateImplCall(w, arg, ""); - w.WriteLine("var env = new LocalEnvironment(1, verbose: true);"); - w.WriteLine("var view = builder.Create{0}{1}Impl(env, data);", prefix, component.LoadNames[0]); - w.WriteLine("return new Tuple(view, new DataTransform(view));"); - } - w.WriteLine("}"); - } - } - - internal sealed class TransformModuleInstanceEntryPointGenerator : GeneratorBase - { - private string _compName; - - protected override void GenerateUsings(IndentedTextWriter w) - { - var allNamespaces = new HashSet(); - foreach (var ns in Namespaces) - allNamespaces.Add(ns); - allNamespaces.Add("System.Collections.Generic"); - allNamespaces.Add("Microsoft.Analytics.Modules.Common"); - allNamespaces.Add("Microsoft.Analytics.Platform.ML.Models"); - allNamespaces.Add("Microsoft.ML.Data"); - allNamespaces.Add("Microsoft.ML.Modules.Contracts"); - allNamespaces.Add("Microsoft.ML.Modules.Contracts.Attributes"); - allNamespaces.Add("Microsoft.ML.Modules.Contracts.Types"); - var namespaces = allNamespaces.ToArray(); - Array.Sort(namespaces, - (a, b) => - a.StartsWith("System") && !b.StartsWith("System") ? -1 - : !a.StartsWith("System") && b.StartsWith("System") ? 1 - : string.CompareOrdinal(a, b)); - foreach (var ns in namespaces) - w.WriteLine("using {0};", ns); - } - - protected override void GenerateClassName(IndentedTextWriter w, string prefix, ComponentCatalog.LoadableClassInfo component) - { - w.WriteLine(); - var className = prefix + component.LoadNames[0]; - w.WriteLine("/// Module: {0}", className); - w.WriteLine("[Obsolete]"); - w.WriteLine("public static class {0}EntryPoint", className); - w.WriteLine("{"); - } - - protected override void GenerateContent(IndentedTextWriter writer, string prefix, - ComponentCatalog.LoadableClassInfo component, string moduleId) - { - writer.WriteLine("[Module("); - _compName = prefix + component.LoadNames[0]; - var name = Name ?? PrettyPrintDisplayName(component.LoadNames[0]); - using (writer.Nest()) - { - writer.WriteLine("Name = \"{0}\",", name); - writer.WriteLine("FamilyId = \"{0}\",", moduleId); - writer.WriteLine("Owner = \"{0}\",", Owner); - writer.WriteLine("ReleaseVersion = \"{0}\",", Version); - writer.WriteLine("State = ModuleState.{0},", State); - writer.WriteLine("Type = ModuleType.{0},", ModuleType); - writer.WriteLine("Determinism = Determinism.{0},", Determinism); - writer.WriteLine("Category = @\"{0}\")]", Category); - } - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public static IModule Create{0}(", _compName); - using (writer.Nest()) - { - writer.WriteLine("[Help(Display = @\"Dataset\", ToolTip = @\"Input dataset\")]"); - writer.WriteLine("[ModuleInputPort]"); - writer.WriteLine("IDataView idataset,"); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateMethodSignature(writer, arg, null, null, null, ""); - writer.WriteLine("[Help(Display = @\"Results dataset\", ToolTip = @\"Transformed dataset\")]"); - writer.WriteLine("[ModuleOutputPort]"); - writer.WriteLine("IDataView odataset,"); - writer.WriteLine("[Help(Display = @\"{0}\", ToolTip = @\"{0}\")]", name); - writer.WriteLine("[ModuleOutputPort]"); - writer.WriteLine("DataTransform otransform,"); - writer.WriteLine("[Context]"); - writer.WriteLine("IContext context)"); - } - writer.WriteLine("{"); - using (writer.Nest()) - { - writer.WriteLine("var instance = new {0}Module();", _compName); - writer.WriteLine(); - writer.WriteLine("var ports = new Dictionary { { \"idataset\", idataset } };"); - writer.WriteLine("var parameters = new Dictionary"); - writer.WriteLine("{"); - using (writer.Nest()) - { - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateDictionaryEntry(writer, arg, ""); - } - writer.WriteLine("};"); - writer.WriteLine(); - writer.WriteLine("instance.Context = context;"); - writer.WriteLine("instance.SetInputPorts(ports);"); - writer.WriteLine("instance.SetParameters(parameters);"); - writer.WriteLine(); - writer.WriteLine("return instance;"); - } - writer.WriteLine("}"); - writer.WriteLine(); - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public class {0}Module : ModuleBase", _compName); - writer.WriteLine("{"); - using (writer.Nest()) - { - writer.WriteLine("private Dictionary parameters;"); - writer.WriteLine("private Dictionary ports;"); - writer.WriteLine(); - writer.WriteLine("public override Dictionary Run()"); - writer.WriteLine("{"); - using (writer.Nest()) - { - writer.WriteLine("var view = ConstructTransform((IDataView)ports[\"idataset\"]);"); - writer.WriteLine("return new Dictionary { { \"odataset\", view }, { \"otransform\", new DataTransform(view) } };"); - } - writer.WriteLine("}"); - writer.WriteLine(); - writer.WriteLine("public override void SetParameters(Dictionary parameters)"); - writer.WriteLine("{"); - using (writer.Nest()) - writer.WriteLine("this.parameters = parameters;"); - writer.WriteLine("}"); - writer.WriteLine(); - writer.WriteLine("public override void SetInputPorts(Dictionary ports)"); - writer.WriteLine("{"); - using (writer.Nest()) - writer.WriteLine("this.ports = ports;"); - writer.WriteLine("}"); - writer.WriteLine(); - writer.WriteLine("public override Dictionary ComputeSchema(Dictionary inputports)"); - writer.WriteLine("{"); - using (writer.Nest()) - { - writer.WriteLine("var view = ConstructTransform((IDataView)inputports[\"idataset\"]);"); - writer.WriteLine("return new Dictionary { { \"odataset\", view.Schema } };"); - } - writer.WriteLine("}"); - writer.WriteLine(); - writer.WriteLine("private IDataView ConstructTransform(IDataView input)"); - writer.WriteLine("{"); - using (writer.Nest()) - { - writer.WriteLine("var builder = new {0}();", _compName); - var argumentInfo = CmdParser.GetArgInfo(component.ArgType, component.CreateArguments()); - foreach (var arg in argumentInfo.Args.Where(a => !a.IsHidden)) - GenerateImplCall(writer, arg, null, null, null, ""); - writer.WriteLine("return builder.Create{0}Impl(Host, input);", _compName); - } - writer.WriteLine("}"); - } - writer.WriteLine("}"); - } - - protected override string EnumName(CmdParser.ArgInfo.Arg arg, Type sigType) - { - return _compName + "." + base.EnumName(arg, sigType); - } - - private void GenerateMethodSignature(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string parent, string parentType, string parentValue, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - if (IsColumnType(arg)) - { - GenerateParameterAttribute(w, arg.LongName, null, arg.HelpText, parent, parentType, parentValue); - GenerateParameter(w, "string", arg.LongName + argSuffix); - } - else - { - GenerateParameterAttribute(w, arg.LongName, Stringify(arg.DefaultValue), arg.HelpText, parent, parentType, parentValue); - GenerateParameter(w, GetCSharpTypeName(arg.ItemType), arg.LongName + argSuffix); - } - } - - private void GenerateDictionaryEntry(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - if (IsColumnType(arg)) - GenerateDictionaryEntry(w, "string", arg.LongName + argSuffix); - else - GenerateDictionaryEntry(w, GetCSharpTypeName(arg.ItemType), arg.LongName + argSuffix); - } - - private void GenerateDictionaryEntry(IndentedTextWriter w, string type, string name) - { - w.WriteLine("{{ \"{0}\", {0} }},", name); - } - - private void GenerateImplCall(IndentedTextWriter w, CmdParser.ArgInfo.Arg arg, string parent, string parentType, string parentValue, string argSuffix) - { - if (Exclude.Contains(arg.LongName)) - return; - - if (IsColumnType(arg) || IsStringColumnType(arg)) - { - string name = arg.LongName + argSuffix; - if (arg.IsCollection) - w.WriteLine("builder.{0} = ((string)parameters[\"{1}\"]).Split('|');", Capitalize(name), name); - else - w.WriteLine("builder.{0} = (string)parameters[\"{1}\"];", Capitalize(name), name); - } - else - GenerateImplCall(w, GetCSharpTypeName(arg.ItemType), arg.LongName + argSuffix); - } - - private void GenerateImplCall(IndentedTextWriter w, string type, string name) - { - w.WriteLine("builder.{0} = ({1})parameters[\"{2}\"];", Capitalize(name), type, name); - } - - protected override void GenerateParameter(IndentedTextWriter w, string type, string name) - { - w.WriteLine("{0} {1},", type, name); - } - - private void GenerateParameterAttribute(IndentedTextWriter w, string displayName, object defaultValue, string description, - string parent = null, string parentType = null, string parentValue = null) - { - w.WriteLine("[Help(Display = @\"{0}\", ToolTip = \"{1}\")]", PrettyPrintDisplayName(displayName), description); - if (parent != null) - w.WriteLine("[Relevancy(Key = \"{0}\", Values = new object[] {{ {1}.{2} }})]", parent, parentType, parentValue); - if (defaultValue != null) - w.WriteLine("[Domain(DefaultValue = {0})]", defaultValue); - w.WriteLine("[ModuleParameter]"); - } - - private string PrettyPrintDisplayName(string displayName) - { - var sb = new StringBuilder(); - bool first = true; - foreach (var c in Capitalize(displayName)) - { - if (!first && c >= 'A' && c <= 'Z') - sb.Append(' '); - first = false; - sb.Append(c); - } - return sb.ToString(); - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/ImportTextData.cs deleted file mode 100644 index bf4faadbf2..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/ImportTextData.cs +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.CommandLine; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; - -[assembly: EntryPointModule(typeof(Microsoft.ML.Legacy.EntryPoints.ImportTextData))] - -// The warning #612 is disabled because the following code uses legacy TextLoader. -// Because that dependency will be removed form ML.NET, one needs to rewrite all places where legacy APIs are used. -#pragma warning disable 612 -namespace Microsoft.ML.Legacy.EntryPoints -{ - /// - /// A component for importing text files as . - /// - public static class ImportTextData - { - [TlcModule.EntryPointKind(typeof(ILearningPipelineLoader))] - public sealed class LoaderInput - { - [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)] - public IFileHandle InputFile; - - [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 2)] - public TextLoader.Arguments Arguments = new TextLoader.Arguments(); - } - - [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] - public static ML.EntryPoints.ImportTextData.Output TextLoader(IHostEnvironment env, LoaderInput input) - { - Contracts.CheckValue(env, nameof(env)); - var host = env.Register("ImportTextData"); - env.CheckValue(input, nameof(input)); - EntryPointUtils.CheckInputArgs(host, input); - var loader = host.CreateLoader(input.Arguments, new FileHandleSource(input.InputFile)); - return new ML.EntryPoints.ImportTextData.Output { Data = loader }; - } - } -} -#pragma warning restore 612 diff --git a/src/Microsoft.ML.Legacy/Runtime/Experiment/Experiment.cs b/src/Microsoft.ML.Legacy/Runtime/Experiment/Experiment.cs deleted file mode 100644 index b83112cc9b..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/Experiment/Experiment.cs +++ /dev/null @@ -1,330 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.IO; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.EntryPoints.JsonUtils; -using Newtonsoft.Json; -using Newtonsoft.Json.Converters; -using Newtonsoft.Json.Linq; - -namespace Microsoft.ML -{ - /// - /// This class represents an entry point graph. - /// The nodes in the graph represent entry point calls and - /// the edges of the graph are variables that help connect the nodes. - /// - [JsonConverter(typeof(ExperimentSerializer))] - public sealed partial class Experiment - { - private sealed class SerializationHelper - { - public string Name { get; set; } - public object Inputs { get; set; } - public object Outputs { get; set; } - } - - private readonly IHostEnvironment _env; - private readonly ComponentCatalog _catalog; - private readonly List _jsonNodes; - private readonly JsonSerializer _serializer; - private readonly SerializationHelper _helper; - private EntryPointGraph _graph; - public Experiment(IHostEnvironment env) - { - _env = env; - AssemblyRegistration.RegisterAssemblies(_env); - - _catalog = _env.ComponentCatalog; - _jsonNodes = new List(); - _serializer = new JsonSerializer(); - _serializer.Converters.Add(new StringEnumConverter()); - _helper = new SerializationHelper(); - } - - /// - /// Parses the nodes to determine the validity of the graph and - /// to determine the inputs and outputs of the graph. - /// - public void Compile() - { - _env.Check(_graph == null, "Multiple calls to " + nameof(Compile) + "() detected."); - var nodes = GetNodes(); - _graph = new EntryPointGraph(_env, nodes); - } - - public JArray GetNodes() - { - JObject json; - try - { - json = JObject.Parse($"{{'nodes': [{string.Join(",", _jsonNodes)}]}}"); - } - catch (JsonReaderException ex) - { - throw _env.Except(ex, "Failed to parse experiment graph: {0}", ex.Message); - } - - return json["nodes"] as JArray; - } - - public void SetInput(string varName, TInput input) - where TInput : class - { - _env.CheckNonEmpty(varName, nameof(varName)); - _env.CheckValue(input, nameof(input)); - - EntryPointVariable entryPointVariable = _graph.GetVariableOrNull(varName); - - if (entryPointVariable == null) - throw _env.Except("Port '{0}' not found", varName); - if (entryPointVariable.HasOutputs) - throw _env.Except("Port '{0}' is not an input", varName); - if (entryPointVariable.Value != null) - throw _env.Except("Port '{0}' is already set", varName); - if (!entryPointVariable.Type.IsAssignableFrom(typeof(TInput))) - throw _env.Except("Port '{0}' is of incorrect type", varName); - - entryPointVariable.SetValue(input); - } - - public void SetInput(Var variable, TInput input) - where TInput : class - { - _env.CheckValue(variable, nameof(variable)); - var varName = variable.VarName; - SetInput(varName, input); - } - - public void SetInput(ArrayVar variable, TInput2[] input) - where TInput : class - { - _env.CheckValue(variable, nameof(variable)); - var varName = variable.VarName; - _env.CheckNonEmpty(varName, nameof(variable.VarName)); - _env.CheckValue(input, nameof(input)); - if (!typeof(TInput).IsAssignableFrom(typeof(TInput2))) - throw _env.ExceptUserArg(nameof(input), $"Type {typeof(TInput2)} not castable to type {typeof(TInput)}"); - - EntryPointVariable entryPointVariable = _graph.GetVariableOrNull(varName); - - if (entryPointVariable == null) - throw _env.Except("Port '{0}' not found", varName); - if (entryPointVariable.HasOutputs) - throw _env.Except("Port '{0}' is not an input", varName); - if (entryPointVariable.Value != null) - throw _env.Except("Port '{0}' is already set", varName); - if (!entryPointVariable.Type.IsAssignableFrom(typeof(TInput[]))) - throw _env.Except("Port '{0}' is of incorrect type", varName); - - entryPointVariable.SetValue(input); - } - - public void Run() - { - var graphRunner = new GraphRunner(_env, _graph); - graphRunner.RunAll(); - } - - public TOutput GetOutput(Var var) - where TOutput : class - { - _env.CheckValue(var, nameof(var)); - _env.CheckNonEmpty(var.VarName, nameof(var.VarName)); - var varBinding = VariableBinding.Create(_env, $"${var.VarName}"); - EntryPointVariable variable = _graph.GetVariableOrNull(varBinding.VariableName); - - if (variable == null) - throw _env.Except("Port '{0}' not found", var.VarName); - var value = varBinding.GetVariableValueOrNull(variable); - if (value == null) - return null; - - var result = value as TOutput; - if (result == null) - throw _env.Except("Incorrect type for output '{0}'", var.VarName); - return result; - } - - public void Reset() - { - _graph = null; - _jsonNodes.Clear(); - } - - private string Serialize(string name, object input, object output) - { - _helper.Name = name; - _helper.Inputs = input; - _helper.Outputs = output; - using (var sw = new StringWriter()) - { - using (var jw = new JsonTextWriter(sw)) - { - jw.Formatting = Newtonsoft.Json.Formatting.Indented; - _serializer.Serialize(jw, _helper); - } - return sw.ToString(); - } - } - - private string GetEntryPointName(Type inputType) - { - if (inputType.FullName != null) - { - int dotCounts = 0; - string fullName = inputType.FullName; - for (int i = fullName.Length - 1; i >= 0; i--) - { - if (fullName[i] == '.') - dotCounts++; - if(dotCounts == 2) - { - return fullName.Substring(i + 1); - } - } - - Contracts.Assert(dotCounts == 1); - - return fullName; - } - - return null; - } - - public EntryPointTransformOutput Add(CommonInputs.ITransformInput input) - { - var output = new EntryPointTransformOutput(); - Add(input, output); - return output; - } - - internal void Add(CommonInputs.ITransformInput input, CommonOutputs.ITransformOutput output) - { - _jsonNodes.Add(Serialize(GetEntryPointName(input.GetType()), input, output)); - } - - public EntryPointTrainerOutput Add(CommonInputs.ITrainerInput input) - { - var output = new EntryPointTrainerOutput(); - Add(input, output); - return output; - } - - internal void Add(CommonInputs.ITrainerInput input, CommonOutputs.ITrainerOutput output) - { - _jsonNodes.Add(Serialize(GetEntryPointName(input.GetType()), input, output)); - } - - public CommonOutputs.IEvaluatorOutput Add(CommonInputs.IEvaluatorInput input, CommonOutputs.IEvaluatorOutput output) - { - _jsonNodes.Add(Serialize(GetEntryPointName(input.GetType()), input, output)); - return output; - } - - public string ToJsonString() => String.Join(",", _jsonNodes); - } - - public sealed class ComponentSerializer : JsonConverter - { - private class Helper - { - public string Name { get; set; } - public object Settings { get; set; } - } - - public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) - { - Contracts.Assert(value is ComponentKind); - var componentKind = (ComponentKind)value; - var helper = new Helper(); - helper.Name = componentKind.ComponentName; - helper.Settings = componentKind; - serializer.ReferenceLoopHandling = ReferenceLoopHandling.Serialize; - serializer.Serialize(writer, helper); - } - - public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) - { - throw Contracts.ExceptNotImpl("Parsing JSON for Component not needed for the C# API."); - } - - public override bool CanConvert(Type objectType) => typeof(ComponentKind).IsAssignableFrom(objectType); - - public override bool CanRead => false; - } - - public sealed class ExperimentSerializer : JsonConverter - { - public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) - { - Contracts.Assert(value is Experiment); - var subGraph = (Experiment)value; - var nodes = subGraph.GetNodes(); - nodes.WriteTo(writer); - } - - public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) - { - throw Contracts.ExceptNotImpl("Parsing JSON for Experiment not needed for the C# API."); - } - - public override bool CanConvert(Type objectType) => typeof(Experiment).IsAssignableFrom(objectType); - - public override bool CanRead => false; - } - - public abstract class ComponentKind - { - internal ComponentKind() { } - - [JsonIgnore] - internal abstract string ComponentName { get; } - } - - public static class ExperimentUtils - { - public static Experiment CreateExperiment(this IHostEnvironment env) - { - return new Experiment(env); - } - - public static string GenerateOverallMetricVarName(Guid id) => $"Var_OM_{id:N}"; - } - - public sealed class EntryPointTransformOutput : CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } - - /// - /// Transform model - /// - public Var Model { get; set; } - - public EntryPointTransformOutput() - { - OutputData = new Var(); - Model = new Var(); - } - } - - public sealed class EntryPointTrainerOutput : CommonOutputs.ITrainerOutput - { - /// - /// The trained model - /// - public Var PredictorModel { get; set; } - - public EntryPointTrainerOutput() - { - PredictorModel = new Var(); - } - } -} diff --git a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpApiGenerator.cs deleted file mode 100644 index 91bdd46f21..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ /dev/null @@ -1,618 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reflection; -using Microsoft.ML; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.Internal.Tools; -using Microsoft.ML.Internal.Utilities; -using Microsoft.ML.Tools; -using Newtonsoft.Json.Linq; -using static Microsoft.ML.EntryPoints.CommonInputs; - -[assembly: LoadableClass(typeof(CSharpApiGenerator), typeof(CSharpApiGenerator.Arguments), typeof(SignatureModuleGenerator), - "CSharp API generator", "CSGenerator", "CS")] - -#pragma warning disable 612 -namespace Microsoft.ML.Internal.Tools -{ - internal sealed class CSharpApiGenerator : IGenerator - { - public sealed class Arguments - { - [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The path of the generated C# file")] - public string CsFilename; - - [Argument(ArgumentType.Multiple, HelpText = "Entry points to exclude", ShortName = "excl")] - public string[] Exclude; - } - - private readonly IHost _host; - private readonly string _csFilename; - private readonly string _regenerate; - private readonly HashSet _excludedSet; - private const string RegistrationName = "CSharpApiGenerator"; - private const string _defaultNamespace = "Microsoft.ML.Legacy."; - private readonly GeneratedClasses _generatedClasses; - - public CSharpApiGenerator(IHostEnvironment env, Arguments args, string regenerate) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(RegistrationName); - _host.AssertValue(args, nameof(args)); - _host.AssertNonEmpty(regenerate, nameof(regenerate)); - Utils.CheckOptionalUserDirectory(args.CsFilename, nameof(args.CsFilename)); - - _csFilename = args.CsFilename; - if (string.IsNullOrWhiteSpace(_csFilename)) - _csFilename = "CSharpApi.cs"; - _regenerate = regenerate; - _excludedSet = new HashSet(args.Exclude); - _generatedClasses = new GeneratedClasses(); - } - - public void Generate(IEnumerable infos) - { - var catalog = _host.ComponentCatalog; - - using (var sw = new StreamWriter(_csFilename)) - { - var writer = new IndentedTextWriter(sw, " "); - - // Generate header - CSharpGeneratorUtils.GenerateHeader(writer); - - foreach (var entryPointInfo in catalog.AllEntryPoints().Where(x => !_excludedSet.Contains(x.Name)).OrderBy(x => x.Name)) - { - // Generate method - CSharpGeneratorUtils.GenerateMethod(writer, entryPointInfo.Name, _defaultNamespace); - } - - // Generate footer - CSharpGeneratorUtils.GenerateFooter(writer); - - foreach (var entryPointInfo in catalog.AllEntryPoints().Where(x => !_excludedSet.Contains(x.Name)).OrderBy(x => x.Name)) - { - // Generate input and output classes - GenerateInputOutput(writer, entryPointInfo, catalog); - } - - foreach (var kind in catalog.GetAllComponentKinds()) - { - // Generate kind base class - GenerateComponentKind(writer, kind); - - foreach (var component in catalog.GetAllComponents(kind)) - { - // Generate component - GenerateComponent(writer, component, catalog); - } - } - - CSharpGeneratorUtils.GenerateFooter(writer); - writer.WriteLine("#pragma warning restore"); - } - } - - private void GenerateInputOutput(IndentedTextWriter writer, ComponentCatalog.EntryPointInfo entryPointInfo, ComponentCatalog catalog) - { - var classAndMethod = CSharpGeneratorUtils.GetEntryPointMetadata(entryPointInfo); - writer.WriteLine($"namespace Legacy.{classAndMethod.Namespace}"); - writer.WriteLine("{"); - writer.Indent(); - GenerateInput(writer, entryPointInfo, catalog); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - } - - private void GenerateEnums(IndentedTextWriter writer, Type inputType, string currentNamespace) - { - foreach (var fieldInfo in inputType.GetFields()) - { - var inputAttr = fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute; - if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly) - continue; - var type = CSharpGeneratorUtils.ExtractOptionalOrNullableType(fieldInfo.FieldType); - if (_generatedClasses.IsGenerated(type.FullName)) - continue; - - if (!type.IsEnum) - { - var typeEnum = TlcModule.GetDataType(type); - if (typeEnum == TlcModule.DataKind.Unknown) - GenerateEnums(writer, type, currentNamespace); - continue; - } - - var enumType = Enum.GetUnderlyingType(type); - - writer.WriteLine("[Obsolete]"); - var apiName = _generatedClasses.GetApiName(type, currentNamespace); - if (enumType == typeof(int)) - writer.WriteLine($"public enum {apiName}"); - else - { - Contracts.Assert(enumType == typeof(byte)); - writer.WriteLine($"public enum {apiName} : byte"); - } - - _generatedClasses.MarkAsGenerated(type.FullName); - writer.WriteLine("{"); - writer.Indent(); - var names = Enum.GetNames(type); - var values = Enum.GetValues(type); - var lines = new List(); - for (int i = 0; i < names.Length; i++) - { - var name = names[i]; - if (type.GetField(name).GetCustomAttribute() != null) - continue; - var value = values.GetValue(i); - if (enumType == typeof(int)) - lines.Add($"{name} = {(int)value}"); - else - { - Contracts.Assert(enumType == typeof(byte)); - lines.Add($"{name} = {(byte)value}"); - } - } - for (int i = 0; i < lines.Count - 1; i++) - { - writer.WriteLine($"{lines[i]},"); - } - writer.WriteLine($"{lines[lines.Count-1]}"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - } - } - - private void GenerateClasses(IndentedTextWriter writer, Type inputType, ComponentCatalog catalog, string currentNamespace) - { - foreach (var fieldInfo in inputType.GetFields()) - { - var inputAttr = fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute; - if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly) - continue; - - var type = fieldInfo.FieldType; - type = CSharpGeneratorUtils.ExtractOptionalOrNullableType(type); - if (type.IsArray) - type = type.GetElementType(); - if (type == typeof(JArray) || type == typeof(JObject)) - continue; - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Var<>)) - continue; - if (type == typeof(CommonInputs.IEvaluatorInput)) - continue; - if (type == typeof(CommonOutputs.IEvaluatorOutput)) - continue; - var typeEnum = TlcModule.GetDataType(type); - if (typeEnum != TlcModule.DataKind.Unknown) - continue; - - if (_generatedClasses.IsGenerated(type.FullName)) - continue; - GenerateEnums(writer, type, currentNamespace); - GenerateClasses(writer, type, catalog, currentNamespace); - - var apiName = _generatedClasses.GetApiName(type, currentNamespace); - string classBase = ""; - if (type.IsSubclassOf(typeof(OneToOneColumn))) - classBase = $" : OneToOneColumn<{apiName}>, IOneToOneColumn"; - else if (type.IsSubclassOf(typeof(ManyToOneColumn))) - classBase = $" : ManyToOneColumn<{apiName}>, IManyToOneColumn"; - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public sealed partial class {apiName}{classBase}"); - writer.WriteLine("{"); - writer.Indent(); - _generatedClasses.MarkAsGenerated(type.FullName); - GenerateInputFields(writer, type, catalog, currentNamespace); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - } - } - - private void GenerateColumnAddMethods(IndentedTextWriter writer, Type inputType, ComponentCatalog catalog, - string className, out Type columnType) - { - columnType = null; - foreach (var fieldInfo in inputType.GetFields()) - { - var inputAttr = fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute; - if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly) - continue; - var type = CSharpGeneratorUtils.ExtractOptionalOrNullableType(fieldInfo.FieldType); - var isArray = type.IsArray; - if (isArray) - type = type.GetElementType(); - if (type == typeof(JArray) || type == typeof(JObject)) - continue; - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Var<>)) - continue; - var typeEnum = TlcModule.GetDataType(type); - if (typeEnum != TlcModule.DataKind.Unknown) - continue; - - if (type.IsSubclassOf(typeof(OneToOneColumn))) - columnType = GenerateOneToOneColumn(writer, className, columnType, fieldInfo, inputAttr, type, isArray); - else if (type.IsSubclassOf(typeof(ManyToOneColumn))) - columnType = GenerateManyToOneColumn(writer, className, columnType, fieldInfo, inputAttr, type, isArray); - } - } - - private Type GenerateManyToOneColumn(IndentedTextWriter writer, string className, Type columnType, - System.Reflection.FieldInfo fieldInfo, ArgumentAttribute inputAttr, Type type, bool isArray) - { - var fieldName = CSharpGeneratorUtils.Capitalize(inputAttr.Name ?? fieldInfo.Name); - var apiName = _generatedClasses.GetApiName(type, ""); - writer.WriteLine($"public {className}()"); - writer.WriteLine("{"); - writer.WriteLine("}"); - writer.WriteLine(""); - writer.WriteLine($"public {className}(string output{fieldName}, params string[] input{fieldName}s)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"Add{fieldName}(output{fieldName}, input{fieldName}s);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - writer.WriteLine($"public void Add{fieldName}(string name, params string[] source)"); - writer.WriteLine("{"); - writer.Indent(); - if (isArray) - { - writer.WriteLine($"var list = {fieldName} == null ? new List<{apiName}>() : new List<{apiName}>({fieldName});"); - writer.WriteLine($"list.Add(ManyToOneColumn<{apiName}>.Create(name, source));"); - writer.WriteLine($"{fieldName} = list.ToArray();"); - } - else - writer.WriteLine($"{fieldName} = ManyToOneColumn<{apiName}>.Create(name, source);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - - Contracts.Assert(columnType == null); - - columnType = type; - return columnType; - } - - private Type GenerateOneToOneColumn(IndentedTextWriter writer, string className, Type columnType, - System.Reflection.FieldInfo fieldInfo, ArgumentAttribute inputAttr, Type type, bool isArray) - { - var fieldName = CSharpGeneratorUtils.Capitalize(inputAttr.Name ?? fieldInfo.Name); - var generatedType = _generatedClasses.GetApiName(type, ""); - writer.WriteLine($"public {className}()"); - writer.WriteLine("{"); - writer.WriteLine("}"); - writer.WriteLine(""); - writer.WriteLine($"public {className}(params string[] input{fieldName}s)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"if (input{fieldName}s != null)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"foreach (string input in input{fieldName}s)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"Add{fieldName}(input);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.Outdent(); - writer.WriteLine("}"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - writer.WriteLine($"public {className}(params (string inputColumn, string outputColumn)[] inputOutput{fieldName}s)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"if (inputOutput{fieldName}s != null)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"foreach (var inputOutput in inputOutput{fieldName}s)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"Add{fieldName}(inputOutput.outputColumn, inputOutput.inputColumn);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.Outdent(); - writer.WriteLine("}"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - writer.WriteLine($"public void Add{fieldName}(string inputColumn)"); - writer.WriteLine("{"); - writer.Indent(); - if (isArray) - { - writer.WriteLine($"var list = {fieldName} == null ? new List<{generatedType}>() : new List<{generatedType}>({fieldName});"); - writer.WriteLine($"list.Add(OneToOneColumn<{generatedType}>.Create(inputColumn));"); - writer.WriteLine($"{fieldName} = list.ToArray();"); - } - else - writer.WriteLine($"{fieldName} = OneToOneColumn<{generatedType}>.Create(inputColumn);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - writer.WriteLine($"public void Add{fieldName}(string outputColumn, string inputColumn)"); - writer.WriteLine("{"); - writer.Indent(); - if (isArray) - { - writer.WriteLine($"var list = {fieldName} == null ? new List<{generatedType}>() : new List<{generatedType}>({fieldName});"); - writer.WriteLine($"list.Add(OneToOneColumn<{generatedType}>.Create(outputColumn, inputColumn));"); - writer.WriteLine($"{fieldName} = list.ToArray();"); - } - else - writer.WriteLine($"{fieldName} = OneToOneColumn<{generatedType}>.Create(outputColumn, inputColumn);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - - Contracts.Assert(columnType == null); - - columnType = type; - return columnType; - } - - private void GenerateInput(IndentedTextWriter writer, ComponentCatalog.EntryPointInfo entryPointInfo, ComponentCatalog catalog) - { - var entryPointMetadata = CSharpGeneratorUtils.GetEntryPointMetadata(entryPointInfo); - string classBase = ""; - if (entryPointInfo.InputKinds != null) - { - classBase += $" : {string.Join(", ", entryPointInfo.InputKinds.Select(CSharpGeneratorUtils.GetCSharpTypeName))}"; - if (entryPointInfo.InputKinds.Any(t => typeof(ITrainerInput).IsAssignableFrom(t) || typeof(ITransformInput).IsAssignableFrom(t))) - classBase += ", Microsoft.ML.Legacy.ILearningPipelineItem"; - } - - GenerateEnums(writer, entryPointInfo.InputType, _defaultNamespace + entryPointMetadata.Namespace); - writer.WriteLineNoTabs(); - GenerateClasses(writer, entryPointInfo.InputType, catalog, _defaultNamespace + entryPointMetadata.Namespace); - CSharpGeneratorUtils.GenerateSummary(writer, entryPointInfo.Description, entryPointInfo.XmlInclude); - - if (entryPointInfo.ObsoleteAttribute != null) - writer.WriteLine($"[Obsolete(\"{entryPointInfo.ObsoleteAttribute.Message}\")]"); - else - writer.WriteLine("[Obsolete]"); - - writer.WriteLine($"public sealed partial class {entryPointMetadata.ClassName}{classBase}"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLineNoTabs(); - if (entryPointInfo.InputKinds != null && entryPointInfo.InputKinds.Any(t => typeof(Legacy.ILearningPipelineLoader).IsAssignableFrom(t))) - CSharpGeneratorUtils.GenerateLoaderAddInputMethod(writer, entryPointMetadata.ClassName); - - GenerateColumnAddMethods(writer, entryPointInfo.InputType, catalog, entryPointMetadata.ClassName, out Type transformType); - writer.WriteLineNoTabs(); - GenerateInputFields(writer, entryPointInfo.InputType, catalog, _defaultNamespace + entryPointMetadata.Namespace); - writer.WriteLineNoTabs(); - - GenerateOutput(writer, entryPointInfo, out HashSet outputVariableNames); - GenerateApplyFunction(writer, entryPointMetadata.ClassName, transformType, outputVariableNames, entryPointInfo.InputKinds); - writer.Outdent(); - writer.WriteLine("}"); - } - - private static void GenerateApplyFunction(IndentedTextWriter writer, string className, Type type, - HashSet outputVariableNames, Type[] inputKinds) - { - if (inputKinds == null) - return; - - bool isTransform = false; - bool isCalibrator = false; - - if (inputKinds.Any(t => typeof(ITransformInput).IsAssignableFrom(t))) - isTransform = true; - else if (!inputKinds.Any(t => typeof(ITrainerInput).IsAssignableFrom(t))) - return; - - if (inputKinds.Any(t => typeof(ICalibratorInput).IsAssignableFrom(t))) - isCalibrator = true; - - writer.WriteLine("[Obsolete]"); - if (isTransform) - writer.WriteLine("public Var GetInputData() => Data;"); - else - writer.WriteLine("public Var GetInputData() => TrainingData;"); - - writer.WriteLine(""); - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)"); - writer.WriteLine("{"); - - writer.Indent(); - writer.WriteLine("if (previousStep != null)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("if (!(previousStep is ILearningPipelineDataStep dataStep))"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("throw new InvalidOperationException($\"{ nameof(" + className + ")} only supports an { nameof(ILearningPipelineDataStep)} as an input.\");"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - - if (isTransform) - { - writer.WriteLine("Data = dataStep.Data;"); - } - else - writer.WriteLine("TrainingData = dataStep.Data;"); - - writer.Outdent(); - writer.WriteLine("}"); - - string pipelineStep = $"{className}PipelineStep"; - writer.WriteLine($"Output output = experiment.Add(this);"); - writer.WriteLine($"return new {pipelineStep}(output);"); - writer.Outdent(); - writer.WriteLine("}"); - - //Pipeline step. - writer.WriteLineNoTabs(); - writer.WriteLine("[Obsolete]"); - if (isTransform && !isCalibrator) - writer.WriteLine($"private class {pipelineStep} : ILearningPipelineDataStep"); - else - writer.WriteLine($"private class {pipelineStep} : ILearningPipelinePredictorStep"); - - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public {pipelineStep}(Output output)"); - writer.WriteLine("{"); - writer.Indent(); - - if (isTransform && !isCalibrator) - { - writer.WriteLine("Data = output.OutputData;"); - if (outputVariableNames.Contains("Model")) - writer.WriteLine("Model = output.Model;"); - } - else - writer.WriteLine("Model = output.PredictorModel;"); - - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - - if (isTransform && !isCalibrator) - { - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public Var Data { get; }"); - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public Var Model { get; }"); - } - else - { - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public Var Model { get; }"); - } - - writer.Outdent(); - writer.WriteLine("}"); - } - - private void GenerateInputFields(IndentedTextWriter writer, Type inputType, ComponentCatalog catalog, string rootNameSpace) - { - var defaults = Activator.CreateInstance(inputType); - foreach (var fieldInfo in inputType.GetFields()) - { - var inputAttr = - fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute; - if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly) - continue; - if (fieldInfo.FieldType == typeof(JObject)) - continue; - - CSharpGeneratorUtils.GenerateSummary(writer, inputAttr.HelpText); - if (fieldInfo.FieldType == typeof(JArray)) - { - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public Experiment {CSharpGeneratorUtils.Capitalize(inputAttr.Name ?? fieldInfo.Name)} {{ get; set; }}"); - writer.WriteLineNoTabs(); - continue; - } - - var inputTypeString = CSharpGeneratorUtils.GetInputType(catalog, fieldInfo.FieldType, _generatedClasses, rootNameSpace); - if (CSharpGeneratorUtils.IsComponent(fieldInfo.FieldType)) - writer.WriteLine("[JsonConverter(typeof(ComponentSerializer))]"); - if (CSharpGeneratorUtils.Capitalize(inputAttr.Name ?? fieldInfo.Name) != (inputAttr.Name ?? fieldInfo.Name)) - writer.WriteLine($"[JsonProperty(\"{inputAttr.Name ?? fieldInfo.Name}\")]"); - - // For range attributes on properties - if (fieldInfo.GetCustomAttributes(typeof(TlcModule.RangeAttribute), false).FirstOrDefault() - is TlcModule.RangeAttribute ranAttr) - writer.WriteLine(ranAttr.ToString()); - - // For sweepable ranges on properties - if (fieldInfo.GetCustomAttributes(typeof(TlcModule.SweepableParamAttribute), false).FirstOrDefault() - is TlcModule.SweepableParamAttribute sweepableParamAttr) - { - if (string.IsNullOrEmpty(sweepableParamAttr.Name)) - sweepableParamAttr.Name = fieldInfo.Name; - writer.WriteLine(sweepableParamAttr.ToString()); - } - - writer.WriteLine("[Obsolete]"); - var line = $"public {inputTypeString} {CSharpGeneratorUtils.Capitalize(inputAttr.Name ?? fieldInfo.Name)} {{ get; set; }}"; - var defaultValue = CSharpGeneratorUtils.GetValue(catalog, fieldInfo.FieldType, fieldInfo.GetValue(defaults), _generatedClasses, rootNameSpace); - if (defaultValue != null) - line += $" = {defaultValue};"; - writer.WriteLine(line); - writer.WriteLineNoTabs(); - } - } - - private void GenerateOutput(IndentedTextWriter writer, ComponentCatalog.EntryPointInfo entryPointInfo, out HashSet outputVariableNames) - { - outputVariableNames = new HashSet(); - string classBase = ""; - if (entryPointInfo.OutputKinds != null) - classBase = $" : {string.Join(", ", entryPointInfo.OutputKinds.Select(CSharpGeneratorUtils.GetCSharpTypeName))}"; - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public sealed class Output{classBase}"); - writer.WriteLine("{"); - writer.Indent(); - - var outputType = entryPointInfo.OutputType; - if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(CommonOutputs.MacroOutput<>)) - outputType = outputType.GetGenericTypeArgumentsEx()[0]; - foreach (var fieldInfo in outputType.GetFields()) - { - var outputAttr = fieldInfo.GetCustomAttributes(typeof(TlcModule.OutputAttribute), false) - .FirstOrDefault() as TlcModule.OutputAttribute; - if (outputAttr == null) - continue; - - CSharpGeneratorUtils.GenerateSummary(writer, outputAttr.Desc); - var outputTypeString = CSharpGeneratorUtils.GetOutputType(fieldInfo.FieldType); - outputVariableNames.Add(CSharpGeneratorUtils.Capitalize(outputAttr.Name ?? fieldInfo.Name)); - writer.WriteLine($"public {outputTypeString} {CSharpGeneratorUtils.Capitalize(outputAttr.Name ?? fieldInfo.Name)} {{ get; set; }} = new {outputTypeString}();"); - writer.WriteLineNoTabs(); - } - - writer.Outdent(); - writer.WriteLine("}"); - } - - private void GenerateComponentKind(IndentedTextWriter writer, string kind) - { - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public abstract class {kind} : ComponentKind {{}}"); - writer.WriteLineNoTabs(); - } - - private void GenerateComponent(IndentedTextWriter writer, ComponentCatalog.ComponentInfo component, ComponentCatalog catalog) - { - GenerateEnums(writer, component.ArgumentType, ""); - writer.WriteLineNoTabs(); - GenerateClasses(writer, component.ArgumentType, catalog, ""); - writer.WriteLineNoTabs(); - CSharpGeneratorUtils.GenerateSummary(writer, component.Description); - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public sealed class {CSharpGeneratorUtils.GetComponentName(component)} : {component.Kind}"); - writer.WriteLine("{"); - writer.Indent(); - GenerateInputFields(writer, component.ArgumentType, catalog, ""); - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"internal override string ComponentName => \"{component.Name}\";"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - } - } -} diff --git a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpGeneratorUtils.cs b/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpGeneratorUtils.cs deleted file mode 100644 index e287aa6ae7..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/CSharpGeneratorUtils.cs +++ /dev/null @@ -1,490 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.CodeDom; -using System.CodeDom.Compiler; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.CSharp; -using Microsoft.ML.CommandLine; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.Internal.Utilities; -using Newtonsoft.Json.Linq; - -namespace Microsoft.ML.Internal.Tools -{ - internal static class CSharpGeneratorUtils - { - public sealed class EntryPointGenerationMetadata - { - public string Namespace { get; } - public string ClassName { get; } - public EntryPointGenerationMetadata(string classNamespace, string className) - { - Namespace = classNamespace; - ClassName = className; - } - } - - public static EntryPointGenerationMetadata GetEntryPointMetadata(ComponentCatalog.EntryPointInfo entryPointInfo) - { - var split = entryPointInfo.Name.Split('.'); - Contracts.Check(split.Length == 2); - return new EntryPointGenerationMetadata(split[0], split[1]); - } - - public static Type ExtractOptionalOrNullableType(Type type) - { - if (type.IsGenericType && (type.GetGenericTypeDefinition() == typeof(Optional<>) || type.GetGenericTypeDefinition() == typeof(Nullable<>))) - type = type.GetGenericArguments()[0]; - - return type; - } - - public static Type ExtractOptionalOrNullableType(Type type, out bool isNullable, out bool isOptional) - { - isNullable = false; - isOptional = false; - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) - { - type = type.GetGenericArguments()[0]; - isNullable = true; - } - else if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Optional<>)) - { - type = type.GetGenericArguments()[0]; - isOptional = true; - } - return type; - } - - public static string GetCSharpTypeName(Type type) - { - if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) - return GetCSharpTypeName(type.GetGenericArguments()[0]) + "?"; - - using (var p = new CSharpCodeProvider()) - return p.GetTypeOutput(new CodeTypeReference(type)); - } - - public static string GetOutputType(Type outputType) - { - Contracts.Check(Var.CheckType(outputType)); - - if (outputType.IsArray) - return $"ArrayVar<{GetCSharpTypeName(outputType.GetElementType())}>"; - if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(Dictionary<,>) - && outputType.GetGenericTypeArgumentsEx()[0] == typeof(string)) - { - return $"DictionaryVar<{GetCSharpTypeName(outputType.GetGenericTypeArgumentsEx()[1])}>"; - } - - return $"Var<{GetCSharpTypeName(outputType)}>"; - } - - public static string GetInputType(ComponentCatalog catalog, Type inputType, GeneratedClasses generatedClasses, string rootNameSpace) - { - if (inputType.IsGenericType && inputType.GetGenericTypeDefinition() == typeof(Var<>)) - return $"Var<{GetCSharpTypeName(inputType.GetGenericTypeArgumentsEx()[0])}>"; - - if (inputType.IsArray && Var.CheckType(inputType.GetElementType())) - return $"ArrayVar<{GetCSharpTypeName(inputType.GetElementType())}>"; - - if (inputType.IsGenericType && inputType.GetGenericTypeDefinition() == typeof(Dictionary<,>) - && inputType.GetGenericTypeArgumentsEx()[0] == typeof(string)) - { - return $"DictionaryVar<{GetCSharpTypeName(inputType.GetGenericTypeArgumentsEx()[1])}>"; - } - - if (Var.CheckType(inputType)) - return $"Var<{GetCSharpTypeName(inputType)}>"; - - var type = ExtractOptionalOrNullableType(inputType, out bool isNullable, out bool isOptional); - var typeEnum = TlcModule.GetDataType(type); - switch (typeEnum) - { - case TlcModule.DataKind.Float: - case TlcModule.DataKind.Int: - case TlcModule.DataKind.UInt: - case TlcModule.DataKind.Char: - case TlcModule.DataKind.String: - case TlcModule.DataKind.Bool: - case TlcModule.DataKind.DataView: - case TlcModule.DataKind.TransformModel: - case TlcModule.DataKind.PredictorModel: - case TlcModule.DataKind.FileHandle: - return GetCSharpTypeName(inputType); - case TlcModule.DataKind.Array: - return GetInputType(catalog, inputType.GetElementType(), generatedClasses, rootNameSpace) + "[]"; - case TlcModule.DataKind.Component: - string kind; - bool success = catalog.TryGetComponentKind(type, out kind); - Contracts.Assert(success); - return $"{kind}"; - case TlcModule.DataKind.Enum: - var enumName = generatedClasses.GetApiName(type, rootNameSpace); - if (isNullable) - return $"{enumName}?"; - if (isOptional) - return $"Optional<{enumName}>"; - return $"{enumName}"; - default: - if (isNullable) - return generatedClasses.GetApiName(type, rootNameSpace) + "?"; - if (isOptional) - return $"Optional<{generatedClasses.GetApiName(type, rootNameSpace)}>"; - return generatedClasses.GetApiName(type, rootNameSpace); - } - } - - public static bool IsComponent(Type inputType) - { - if (inputType.IsArray && Var.CheckType(inputType.GetElementType())) - return false; - - if (inputType.IsGenericType && inputType.GetGenericTypeDefinition() == typeof(Dictionary<,>) - && inputType.GetGenericTypeArgumentsEx()[0] == typeof(string)) - { - return false; - } - - if (Var.CheckType(inputType)) - return false; - - var type = ExtractOptionalOrNullableType(inputType); - var typeEnum = TlcModule.GetDataType(type); - return typeEnum == TlcModule.DataKind.Component; - } - - public static string Capitalize(string s) - { - if (string.IsNullOrEmpty(s)) - return s; - return char.ToUpperInvariant(s[0]) + s.Substring(1); - } - - private static string GetCharAsString(char value) - { - switch (value) - { - case '\t': - return "\\t"; - case '\n': - return "\\n"; - case '\r': - return "\\r"; - case '\\': - return "\\"; - case '\"': - return "\""; - case '\'': - return "\\'"; - case '\0': - return "\\0"; - case '\a': - return "\\a"; - case '\b': - return "\\b"; - case '\f': - return "\\f"; - case '\v': - return "\\v"; - default: - return value.ToString(); - } - } - - public static string GetValue(ComponentCatalog catalog, Type fieldType, object fieldValue, - GeneratedClasses generatedClasses, string rootNameSpace) - { - if (fieldType.IsGenericType && fieldType.GetGenericTypeDefinition() == typeof(Var<>)) - return $"new Var<{GetCSharpTypeName(fieldType.GetGenericTypeArgumentsEx()[0])}>()"; - - if (fieldType.IsArray && Var.CheckType(fieldType.GetElementType())) - return $"new ArrayVar<{GetCSharpTypeName(fieldType.GetElementType())}>()"; - - if (fieldType.IsGenericType && fieldType.GetGenericTypeDefinition() == typeof(Dictionary<,>) - && fieldType.GetGenericTypeArgumentsEx()[0] == typeof(string)) - { - return $"new DictionaryVar<{GetCSharpTypeName(fieldType.GetGenericTypeArgumentsEx()[1])}>()"; - } - - if (Var.CheckType(fieldType)) - return $"new Var<{GetCSharpTypeName(fieldType)}>()"; - - if (fieldValue == null) - return null; - - if (!fieldType.IsInterface) - { - try - { - var defaultFieldValue = Activator.CreateInstance(fieldType); - if (defaultFieldValue == fieldValue) - return null; - } - catch (MissingMethodException) - { - // No parameterless constructor, ignore. - } - } - - var typeEnum = TlcModule.GetDataType(fieldType); - fieldType = ExtractOptionalOrNullableType(fieldType, out bool isNullable, out bool isOptional); - switch (typeEnum) - { - case TlcModule.DataKind.Array: - var arr = fieldValue as Array; - if (arr != null && arr.GetLength(0) > 0) - return $"{{ {string.Join(", ", arr.Cast().Select(item => GetValue(catalog, fieldType.GetElementType(), item, generatedClasses, rootNameSpace)))} }}"; - return null; - case TlcModule.DataKind.String: - var strval = fieldValue as string; - if (strval != null) - return Quote(strval); - return null; - case TlcModule.DataKind.Float: - if (fieldValue is double d) - { - if (double.IsPositiveInfinity(d)) - return "double.PositiveInfinity"; - if (double.IsNegativeInfinity(d)) - return "double.NegativeInfinity"; - if (d != 0) - return d.ToString("R") + "d"; - } - else if (fieldValue is float f) - { - if (float.IsPositiveInfinity(f)) - return "float.PositiveInfinity"; - if (float.IsNegativeInfinity(f)) - return "float.NegativeInfinity"; - if (f != 0) - return f.ToString("R") + "f"; - } - return null; - case TlcModule.DataKind.Int: - if (fieldValue is int i) - { - if (i != 0) - return i.ToString(); - } - else if (fieldValue is long l) - { - if (l != 0) - return l.ToString(); - } - return null; - case TlcModule.DataKind.Bool: - return (bool)fieldValue ? "true" : "false"; - case TlcModule.DataKind.Enum: - string enumAsString = fieldValue.ToString(); - if (fieldType.GetField(enumAsString).GetCustomAttribute() != null) - { - // The default value for the enum has the hiding attribute on it. We will search for - // alternate names. Regrettably I see no way beyond a manual scan. - - string unhiddenName = Enum.GetNames(fieldType).Zip(Enum.GetValues(fieldType).Cast(), (name, val) => (name, val)) - .Where(pair => pair.val.Equals(fieldValue)) - .Where(pair => fieldType.GetField(pair.name).GetCustomAttribute() == null) - .Select(pair => pair.name).FirstOrDefault(); - enumAsString = unhiddenName ?? throw Contracts.Except($"Could not find unhidden alternative for '{fieldValue}' in type '{fieldType}'"); - } - if (generatedClasses.IsGenerated(fieldType.FullName)) - return generatedClasses.GetApiName(fieldType, rootNameSpace) + "." + enumAsString; - else - return generatedClasses.GetApiName(fieldType, "") + "." + enumAsString; - case TlcModule.DataKind.Char: - return $"'{GetCharAsString((char)fieldValue)}'"; - case TlcModule.DataKind.Component: - var type = fieldValue.GetType(); - ComponentCatalog.ComponentInfo componentInfo; - if (!catalog.TryFindComponent(fieldType, type, out componentInfo)) - return null; - object defaultComponent = null; - try - { - defaultComponent = Activator.CreateInstance(componentInfo.ArgumentType); - } - catch (MissingMethodException) - { - // No parameterless constructor, ignore. - } - var propertyBag = new List(); - if (defaultComponent != null) - { - foreach (var fieldInfo in componentInfo.ArgumentType.GetFields()) - { - var inputAttr = fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute; - if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly) - continue; - if (fieldInfo.FieldType == typeof(JArray) || fieldInfo.FieldType == typeof(JObject)) - continue; - - var propertyValue = GetValue(catalog, fieldInfo.FieldType, fieldInfo.GetValue(fieldValue), generatedClasses, rootNameSpace); - var defaultPropertyValue = GetValue(catalog, fieldInfo.FieldType, fieldInfo.GetValue(defaultComponent), generatedClasses, rootNameSpace); - if (propertyValue != defaultPropertyValue) - propertyBag.Add($"{Capitalize(inputAttr.Name ?? fieldInfo.Name)} = {propertyValue}"); - } - } - var properties = propertyBag.Count > 0 ? $" {{ {string.Join(", ", propertyBag)} }}" : ""; - return $"new {GetComponentName(componentInfo)}(){properties}"; - case TlcModule.DataKind.Unknown: - return $"new {generatedClasses.GetApiName(fieldType, rootNameSpace)}()"; - default: - return fieldValue.ToString(); - } - } - - private static string Quote(string src) - { - var dst = src.Replace("\\", @"\\").Replace("\"", "\\\"").Replace("\n", @"\n").Replace("\r", @"\r"); - return "\"" + dst + "\""; - } - - public static string GetComponentName(ComponentCatalog.ComponentInfo component) - { - return $"{Capitalize(component.Name)}{component.Kind}"; - } - - public static void GenerateSummary(IndentedTextWriter writer, string summary, string[] xmlInclude = null) - { - // if the class has an XML it should contain the summary and everything else - if (xmlInclude != null) - { - foreach (var line in xmlInclude) - writer.WriteLine($"/// {line}"); - - return; - } - - if (string.IsNullOrEmpty(summary)) - return; - writer.WriteLine("/// "); - foreach (var line in summary.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)) - writer.WriteLine($"/// {line}"); - writer.WriteLine("/// "); - } - - public static void GenerateHeader(IndentedTextWriter writer) - { - writer.WriteLine("//------------------------------------------------------------------------------"); - writer.WriteLine("// "); - writer.WriteLine("// This code was generated by a tool."); - writer.WriteLine("//"); - writer.WriteLine("// Changes to this file may cause incorrect behavior and will be lost if"); - writer.WriteLine("// the code is regenerated."); - writer.WriteLine("// "); - writer.WriteLine("//------------------------------------------------------------------------------"); - writer.WriteLine("#pragma warning disable"); - writer.WriteLine("using System.Collections.Generic;"); - writer.WriteLine("using Microsoft.ML;"); - writer.WriteLine("using Microsoft.ML.Data;"); - writer.WriteLine("using Microsoft.ML.EntryPoints;"); - writer.WriteLine("using Newtonsoft.Json;"); - writer.WriteLine("using System;"); - writer.WriteLine("using System.Linq;"); - writer.WriteLine("using Microsoft.ML.CommandLine;"); - writer.WriteLineNoTabs(); - writer.WriteLine("namespace Microsoft.ML"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("public sealed partial class Experiment"); - writer.WriteLine("{"); - writer.Indent(); - } - - public static void GenerateFooter(IndentedTextWriter writer) - { - writer.Outdent(); - writer.WriteLine("}"); - } - - public static void GenerateMethod(IndentedTextWriter writer, string className, string defaultNamespace) - { - var inputOuputClassName = defaultNamespace + className; - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public {inputOuputClassName}.Output Add({inputOuputClassName} input)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"var output = new {inputOuputClassName}.Output();"); - writer.WriteLine("Add(input, output);"); - writer.WriteLine("return output;"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public void Add({inputOuputClassName} input, {inputOuputClassName}.Output output)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"_jsonNodes.Add(Serialize(\"{className}\", input, output));"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - } - - public static void GenerateLoaderAddInputMethod(IndentedTextWriter writer, string className) - { - //Constructor. - writer.WriteLine("[Obsolete]"); - writer.WriteLine("[JsonIgnore]"); - writer.WriteLine("private string _inputFilePath = null;"); - writer.WriteLine($"public {className}(string filePath)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("_inputFilePath = filePath;"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - - //SetInput. - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public void SetInput(IHostEnvironment env, Experiment experiment)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false);"); - writer.WriteLine("experiment.SetInput(InputFile, inputFile);"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - - //GetInputData - writer.WriteLine("[Obsolete]"); - writer.WriteLine("public Var GetInputData() => null;"); - writer.WriteLine(""); - - //Apply. - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("Contracts.Assert(previousStep == null);"); - writer.WriteLine(""); - writer.WriteLine($"return new {className}PipelineStep(experiment.Add(this));"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLine(""); - - //Pipelinestep class. - writer.WriteLine("[Obsolete]"); - writer.WriteLine($"private class {className}PipelineStep : ILearningPipelineDataStep"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine($"public {className}PipelineStep (Output output)"); - writer.WriteLine("{"); - writer.Indent(); - writer.WriteLine("Data = output.Data;"); - writer.WriteLine("Model = null;"); - writer.Outdent(); - writer.WriteLine("}"); - writer.WriteLineNoTabs(); - writer.WriteLine("public Var Data { get; }"); - writer.WriteLine("public Var Model { get; }"); - writer.Outdent(); - writer.WriteLine("}"); - } - } -} diff --git a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/GeneratedClasses.cs b/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/GeneratedClasses.cs deleted file mode 100644 index d8bccc1b68..0000000000 --- a/src/Microsoft.ML.Legacy/Runtime/Internal/Tools/GeneratedClasses.cs +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Microsoft.ML.Internal.Tools -{ - internal sealed class GeneratedClasses - { - private sealed class ApiClass - { - public string OriginalName { get; set; } - public string NewName { get; set; } - public bool Generated { get; set; } - } - - private readonly Dictionary _typesSymbolTable; - - public GeneratedClasses() - { - _typesSymbolTable = new Dictionary(); - } - - public string GetApiName(Type type, string rootNamespace) - { - string apiName = ""; - if (!_typesSymbolTable.TryGetValue(type.FullName, out ApiClass apiClass)) - apiName = GenerateIntenalName(type, rootNamespace); - else - apiName = apiClass.NewName; - - if (!string.IsNullOrEmpty(rootNamespace)&& apiName.StartsWith(rootNamespace)) - return apiName.Substring(rootNamespace.Length + 1); - else return apiName; - } - - private string GenerateIntenalName(Type type, string currentNamespace) - { - var fullTypeName = type.FullName; - string name = currentNamespace != "" ? currentNamespace + '.' : ""; - - int bracketIndex = fullTypeName.IndexOf('['); - Type[] genericTypes = null; - if (type.IsGenericType) - genericTypes = type.GetGenericArguments(); - if (bracketIndex > 0) - { - Contracts.AssertValue(genericTypes); - fullTypeName = fullTypeName.Substring(0, bracketIndex); - } - - // When the type is nested, the names of the outer types are concatenated with a '+'. - var nestedNames = fullTypeName.Split('+'); - var baseName = nestedNames[0]; - - // We currently only handle generic types in the outer most class, support for generic inner classes - // can be added if needed. - int backTickIndex = baseName.LastIndexOf('`'); - int dotIndex = baseName.LastIndexOf('.'); - Contracts.Assert(dotIndex >= 0); - if (backTickIndex < 0) - name += baseName.Substring(dotIndex + 1); - else - { - name += baseName.Substring(dotIndex + 1, backTickIndex - dotIndex - 1); - Contracts.AssertValue(genericTypes); - if (genericTypes != null) - { - foreach (var genType in genericTypes) - { - var splitNames = genType.FullName.Split('+'); - if (splitNames[0].LastIndexOf('.') >= 0) - splitNames[0] = splitNames[0].Substring(splitNames[0].LastIndexOf('.') + 1); - name += string.Join("", splitNames); - } - } - } - - for (int i = 1; i < nestedNames.Length; i++) - name += nestedNames[i]; - - Contracts.Assert(_typesSymbolTable.Values.All(apiclass => string.Compare(apiclass.NewName, name) != 0)); - _typesSymbolTable[type.FullName] = new ApiClass { OriginalName = type.FullName, Generated = false, NewName = name }; - return name; - } - - internal bool IsGenerated(string fullName) - { - if (!_typesSymbolTable.ContainsKey(fullName)) - return false; - return _typesSymbolTable[fullName].Generated; - } - - internal void MarkAsGenerated(string fullName) - { - _typesSymbolTable[fullName].Generated = true; - } - } -} diff --git a/src/Microsoft.ML.Legacy/Trainers/LightGBM.cs b/src/Microsoft.ML.Legacy/Trainers/LightGBM.cs deleted file mode 100644 index 063ee67a7f..0000000000 --- a/src/Microsoft.ML.Legacy/Trainers/LightGBM.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Legacy.Trainers -{ - /// - /// This API requires Microsoft.ML.LightGBM nuget. - /// - /// - /// - /// pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }) - /// - /// - public sealed partial class LightGbmBinaryClassifier - { - - } - - /// - /// This API requires Microsoft.ML.LightGBM nuget. - /// - /// - /// - /// pipeline.Add(new LightGbmClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }) - /// - /// - public sealed partial class LightGbmClassifier - { - - } - - /// - /// This API requires Microsoft.ML.LightGBM nuget. - /// - /// - /// - /// pipeline.Add(new LightGbmRanker() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }) - /// - /// - public sealed partial class LightGbmRanker - { - - } - - /// - /// This API requires Microsoft.ML.LightGBM nuget. - /// - /// - /// - /// pipeline.Add(new LightGbmRegressor() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }) - /// - /// - public sealed partial class LightGbmRegressor - { - - } -} diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/Microsoft.ML.LightGBM.StaticPipe.csproj b/src/Microsoft.ML.LightGBM.StaticPipe/Microsoft.ML.LightGBM.StaticPipe.csproj index a93cd754bf..38e5dafa27 100644 --- a/src/Microsoft.ML.LightGBM.StaticPipe/Microsoft.ML.LightGBM.StaticPipe.csproj +++ b/src/Microsoft.ML.LightGBM.StaticPipe/Microsoft.ML.LightGBM.StaticPipe.csproj @@ -7,6 +7,7 @@ + diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index 80776030d1..2c07bcfd92 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -113,7 +113,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) Host.AssertValue(ch); base.CheckDataValid(ch, data); var labelType = data.Schema.Label.Value.Type; - if (!(labelType.IsBool || labelType.IsKey || labelType == NumberType.R4)) + if (!(labelType is BoolType || labelType.IsKey || labelType == NumberType.R4)) { throw ch.ExceptParam(nameof(data), $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 159b95b243..30792d1320 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -131,7 +131,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) Host.AssertValue(ch); base.CheckDataValid(ch, data); var labelType = data.Schema.Label.Value.Type; - if (!(labelType.IsBool || labelType.IsKey || labelType == NumberType.R4)) + if (!(labelType is BoolType || labelType.IsKey || labelType == NumberType.R4)) { throw ch.ExceptParam(nameof(data), $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); diff --git a/src/Microsoft.ML.LightGBM/Properties/AssemblyInfo.cs b/src/Microsoft.ML.LightGBM/Properties/AssemblyInfo.cs index c9db8428a0..b815e0e3a0 100644 --- a/src/Microsoft.ML.LightGBM/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.LightGBM/Properties/AssemblyInfo.cs @@ -8,4 +8,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGBM.StaticPipe" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs index 2226c6d7fc..470342f47b 100644 --- a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs @@ -7,6 +7,7 @@ [assembly: InternalsVisibleTo("Microsoft.ML.TestFramework" + PublicKey.TestValue)] [assembly: InternalsVisibleTo("Microsoft.ML.Benchmarks" + PublicKey.TestValue)] - -[assembly: InternalsVisibleTo("Microsoft.ML.Legacy" + PublicKey.Value)] [assembly: InternalsVisibleTo("Microsoft.ML.ResultProcessor" + PublicKey.Value)] + +[assembly: InternalsVisibleTo("RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo("TLC" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.Onnx/OnnxExportExtensions.cs b/src/Microsoft.ML.Onnx/OnnxExportExtensions.cs new file mode 100644 index 0000000000..0922fe8b0e --- /dev/null +++ b/src/Microsoft.ML.Onnx/OnnxExportExtensions.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.ML.Core.Data; +using Microsoft.ML.Data; +using Microsoft.ML.Model.Onnx; +using Microsoft.ML.UniversalModelFormat.Onnx; + +namespace Microsoft.ML +{ + public static class OnnxExportExtensions + { + /// + /// Convert the specified to ONNX format. Note that ONNX uses Google's Protobuf so the returned value is a Protobuf object. + /// + /// The class that attached to. + /// The that will be converted into ONNX format. + /// The input of the specified transform. + /// An ONNX model equivalent to the converted ML.NET model. + public static ModelProto ConvertToOnnx(this ModelOperationsCatalog catalog, ITransformer transform, IDataView inputData) + { + var env = catalog.Environment; + var ctx = new OnnxContextImpl(env, "model", "ML.NET", "0", 0, "machinelearning.dotnet", OnnxVersion.Stable); + var outputData = transform.Transform(inputData); + LinkedList transforms = null; + using (var ch = env.Start("ONNX conversion")) + { + SaveOnnxCommand.GetPipe(ctx, ch, outputData, out IDataView root, out IDataView sink, out transforms); + return SaveOnnxCommand.ConvertTransformListToOnnxModel(ctx, ch, root, sink, transforms, null, null); + } + } + } +} diff --git a/src/Microsoft.ML.Onnx/SaveOnnxCommand.cs b/src/Microsoft.ML.Onnx/SaveOnnxCommand.cs index 250969850f..aa4e114b58 100644 --- a/src/Microsoft.ML.Onnx/SaveOnnxCommand.cs +++ b/src/Microsoft.ML.Onnx/SaveOnnxCommand.cs @@ -12,6 +12,7 @@ using Microsoft.ML.EntryPoints; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model.Onnx; +using Microsoft.ML.UniversalModelFormat.Onnx; using Newtonsoft.Json; [assembly: LoadableClass(SaveOnnxCommand.Summary, typeof(SaveOnnxCommand), typeof(SaveOnnxCommand.Arguments), typeof(SignatureCommand), @@ -113,9 +114,10 @@ public override void Run() } } - private void GetPipe(OnnxContextImpl ctx, IChannel ch, IDataView end, out IDataView source, out IDataView trueEnd, out LinkedList transforms) + internal static void GetPipe(OnnxContextImpl ctx, IChannel ch, IDataView end, out IDataView source, out IDataView trueEnd, out LinkedList transforms) { - Host.AssertValue(end); + ch.AssertValue(end); + source = trueEnd = (end as CompositeDataLoader)?.View ?? end; IDataTransform transform = source as IDataTransform; transforms = new LinkedList(); @@ -134,7 +136,53 @@ private void GetPipe(OnnxContextImpl ctx, IChannel ch, IDataView end, out IDataV transform = (source = transform.Source) as IDataTransform; } - Host.AssertValue(source); + ch.AssertValue(source); + } + + internal static ModelProto ConvertTransformListToOnnxModel(OnnxContextImpl ctx, IChannel ch, IDataView inputData, IDataView outputData, + LinkedList transforms, HashSet inputColumnNamesToDrop=null, HashSet outputColumnNamesToDrop=null) + { + inputColumnNamesToDrop = inputColumnNamesToDrop ?? new HashSet(); + outputColumnNamesToDrop = outputColumnNamesToDrop ?? new HashSet(); + HashSet inputColumns = new HashSet(); + // Create graph inputs. + for (int i = 0; i < inputData.Schema.Count; i++) + { + string colName = inputData.Schema[i].Name; + if(inputColumnNamesToDrop.Contains(colName)) + continue; + + ctx.AddInputVariable(inputData.Schema[i].Type, colName); + inputColumns.Add(colName); + } + + // Create graph nodes, outputs and intermediate values. + foreach (var trans in transforms) + { + ch.Assert(trans.CanSaveOnnx(ctx)); + trans.SaveAsOnnx(ctx); + } + + // Add graph outputs. + for (int i = 0; i < outputData.Schema.Count; ++i) + { + if (outputData.Schema[i].IsHidden) + continue; + + var idataviewColumnName = outputData.Schema[i].Name; + + // Since the last IDataView also contains columns of the initial IDataView, last IDataView's columns found in + // _inputToDrop should be removed too. + if (inputColumnNamesToDrop.Contains(idataviewColumnName) || outputColumnNamesToDrop.Contains(idataviewColumnName)) + continue; + + var variableName = ctx.TryGetVariableName(idataviewColumnName); + var trueVariableName = ctx.AddIntermediateVariable(null, idataviewColumnName, true); + ctx.CreateNode("Identity", variableName, trueVariableName, ctx.GetNodeName("Identity"), ""); + ctx.AddOutputVariable(outputData.Schema[i].Type, trueVariableName); + } + + return ctx.MakeModel(); } private void Run(IChannel ch) @@ -210,45 +258,8 @@ private void Run(IChannel ch) nameof(Arguments.LoadPredictor), "We were explicitly told to load the predictor but one was not present."); } - HashSet inputColumns = new HashSet(); - //Create graph inputs. - for (int i = 0; i < source.Schema.Count; i++) - { - string colName = source.Schema[i].Name; - if(_inputsToDrop.Contains(colName)) - continue; - - ctx.AddInputVariable(source.Schema[i].Type, colName); - inputColumns.Add(colName); - } - - //Create graph nodes, outputs and intermediate values. - foreach (var trans in transforms) - { - Host.Assert(trans.CanSaveOnnx(ctx)); - trans.SaveAsOnnx(ctx); - } - - //Add graph outputs. - for (int i = 0; i < end.Schema.Count; ++i) - { - if (end.Schema[i].IsHidden) - continue; - - var idataviewColumnName = end.Schema[i].Name; - - // Since the last IDataView also contains columns of the initial IDataView, last IDataView's columns found in - // _inputToDrop should be removed too. - if (_inputsToDrop.Contains(idataviewColumnName) || _outputsToDrop.Contains(idataviewColumnName)) - continue; - - var variableName = ctx.TryGetVariableName(idataviewColumnName); - var trueVariableName = ctx.AddIntermediateVariable(null, idataviewColumnName, true); - ctx.CreateNode("Identity", variableName, trueVariableName, ctx.GetNodeName("Identity"), ""); - ctx.AddOutputVariable(end.Schema[i].Type, trueVariableName); - } + var model = ConvertTransformListToOnnxModel(ctx, ch, source, end, transforms, _inputsToDrop, _outputsToDrop); - var model = ctx.MakeModel(); using (var file = Host.CreateOutputFile(_outputModelPath)) using (var stream = file.CreateWriteStream()) model.WriteTo(stream); diff --git a/src/Microsoft.ML.OnnxTransform.StaticPipe/Microsoft.ML.OnnxTransform.StaticPipe.csproj b/src/Microsoft.ML.OnnxTransform.StaticPipe/Microsoft.ML.OnnxTransform.StaticPipe.csproj index dbb8e6af36..ab1564b7d3 100644 --- a/src/Microsoft.ML.OnnxTransform.StaticPipe/Microsoft.ML.OnnxTransform.StaticPipe.csproj +++ b/src/Microsoft.ML.OnnxTransform.StaticPipe/Microsoft.ML.OnnxTransform.StaticPipe.csproj @@ -7,6 +7,7 @@ + diff --git a/src/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.csproj b/src/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.csproj index ce2ac23746..81f014d392 100644 --- a/src/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.csproj +++ b/src/Microsoft.ML.OnnxTransform/Microsoft.ML.OnnxTransform.csproj @@ -9,7 +9,7 @@ - + diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index 91ae1d81d7..05a792260c 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -42,10 +42,17 @@ namespace Microsoft.ML.Transforms /// /// /// - ///

Supports inferencing of models in 1.2 and 1.3 format, using the - /// Microsoft.ML.OnnxRuntime library + ///

Supports inferencing of models in ONNX 1.2 and 1.3 format (opset 7, 8 and 9), using the + /// Microsoft.ML.OnnxRuntime.Gpu library. ///

- ///

The inputs and outputs of the onnx models must of of Tensors. Sequence and Maps are not yet supported.

+ ///

Models are scored on CPU by default. If GPU execution is needed (optional), install + /// CUDA 10.0 Toolkit + /// and + /// cuDNN + /// , and set the parameter 'gpuDeviceId' to a valid non-negative integer. Typical device ID values are 0 or 1. + ///

+ ///

The inputs and outputs of the ONNX models must be Tensor type. Sequence and Maps are not yet supported.

+ ///

OnnxRuntime currently works on Windows 64-bit platforms only. Linux and OSX to be supported soon.

///

Visit https://github.com/onnx/models to see a list of readily available models to get started with.

///

Refer to http://onnx.ai' for more information about ONNX.

///
@@ -61,6 +68,12 @@ public sealed class Arguments : TransformInputBase [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Name of the output column.", SortOrder = 2)] public string[] OutputColumns; + + [Argument(ArgumentType.AtMostOnce | ArgumentType.Required, HelpText = "GPU device id to run on (e.g. 0,1,..). Null for CPU. Requires CUDA 10.0.", SortOrder = 3)] + public int? GpuDeviceId = null; + + [Argument(ArgumentType.AtMostOnce | ArgumentType.Required, HelpText = "If true, resumes execution on CPU upon GPU error. If false, will raise the GPU execption.", SortOrder = 4)] + public bool FallbackToCpu = false; } private readonly Arguments _args; @@ -88,15 +101,27 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(OnnxTransform).Assembly.FullName); } - public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile) + public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false) { - var args = new Arguments { ModelFile = modelFile, InputColumns = new string[] { }, OutputColumns = new string[] { } }; + var args = new Arguments { + ModelFile = modelFile, + InputColumns = new string[] { }, + OutputColumns = new string[] { }, + GpuDeviceId = gpuDeviceId, + FallbackToCpu = fallbackToCpu }; + return Create(env, args, input); } - public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] inputColumns, string[] outputColumns) + public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] inputColumns, string[] outputColumns, int? gpuDeviceId = null, bool fallbackToCpu = false) { - var args = new Arguments { ModelFile = modelFile, InputColumns = inputColumns, OutputColumns = outputColumns }; + var args = new Arguments { + ModelFile = modelFile, + InputColumns = inputColumns, + OutputColumns = outputColumns, + GpuDeviceId = gpuDeviceId, + FallbackToCpu = fallbackToCpu }; + return Create(env, args, input); } @@ -156,14 +181,21 @@ private OnnxTransform(IHostEnvironment env, Arguments args, byte[] modelBytes = foreach (var col in args.OutputColumns) Host.CheckNonWhiteSpace(col, nameof(args.OutputColumns)); - if (modelBytes == null) + try + { + if (modelBytes == null) + { + Host.CheckNonWhiteSpace(args.ModelFile, nameof(args.ModelFile)); + Host.CheckUserArg(File.Exists(args.ModelFile), nameof(args.ModelFile)); + Model = new OnnxModel(args.ModelFile, args.GpuDeviceId, args.FallbackToCpu); + } + else + Model = OnnxModel.CreateFromBytes(modelBytes, args.GpuDeviceId, args.FallbackToCpu); + } + catch (OnnxRuntimeException e) { - Host.CheckNonWhiteSpace(args.ModelFile, nameof(args.ModelFile)); - Host.CheckUserArg(File.Exists(args.ModelFile), nameof(args.ModelFile)); - Model = new OnnxModel(args.ModelFile); + throw Host.Except(e, $"Error initializing model :{e.ToString()}"); } - else - Model = OnnxModel.CreateFromBytes(modelBytes); var modelInfo = Model.ModelInfo; Inputs = (args.InputColumns.Count() == 0 ) ? Model.InputNames.ToArray() : args.InputColumns; @@ -184,18 +216,68 @@ private OnnxTransform(IHostEnvironment env, Arguments args, byte[] modelBytes = _args = args; } - public OnnxTransform(IHostEnvironment env, string modelFile) - : this(env, new Arguments() { ModelFile = modelFile, InputColumns = new string[] { }, OutputColumns = new string[] { } }) + /// + /// Transform for scoring ONNX models. Input data column names/types must exactly match + /// all model input names. All possible output columns are generated, with names/types + /// specified by model. + /// + /// The environment to use. + /// Model file path. + /// Optional GPU device ID to run execution on. Null for CPU. + /// If GPU error, raise exception or fallback to CPU. + public OnnxTransform(IHostEnvironment env, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false) + : this(env, new Arguments() + { + ModelFile = modelFile, + InputColumns = new string[] {}, + OutputColumns = new string[] {}, + GpuDeviceId = gpuDeviceId, + FallbackToCpu = fallbackToCpu + }) { } - public OnnxTransform(IHostEnvironment env, string modelFile, string inputColumn, string outputColumn) - : this(env, new Arguments() { ModelFile = modelFile, InputColumns = new[] { inputColumn }, OutputColumns = new[] { outputColumn } }) + /// + /// Transform for scoring ONNX models. Input data column name/type must exactly match + /// the model specification. Only 1 output column is generated. + /// + /// The environment to use. + /// Model file path. + /// The name of the input data column. Must match model input name. + /// The output columns to generate. Names must match model specifications. Data types are inferred from model. + /// Optional GPU device ID to run execution on. Null for CPU. + /// If GPU error, raise exception or fallback to CPU. + public OnnxTransform(IHostEnvironment env, string modelFile, string inputColumn, string outputColumn, int? gpuDeviceId = null, bool fallbackToCpu = false) + : this(env, new Arguments() + { + ModelFile = modelFile, + InputColumns = new[] { inputColumn }, + OutputColumns = new[] { outputColumn }, + GpuDeviceId = gpuDeviceId, + FallbackToCpu = fallbackToCpu + }) { } - public OnnxTransform(IHostEnvironment env, string modelFile, string[] inputColumns, string[] outputColumns) - : this(env, new Arguments() { ModelFile = modelFile, InputColumns = inputColumns, OutputColumns = outputColumns }) + /// + /// Transform for scoring ONNX models. Input data column names/types must exactly match + /// all model input names. Only the output columns specified will be generated. + /// + /// The environment to use. + /// Model file path. + /// The name of the input data columns. Must match model's input names. + /// The output columns to generate. Names must match model specifications. Data types are inferred from model. + /// Optional GPU device ID to run execution on. Null for CPU. + /// If GPU error, raise exception or fallback to CPU. + public OnnxTransform(IHostEnvironment env, string modelFile, string[] inputColumns, string[] outputColumns, int? gpuDeviceId = null, bool fallbackToCpu = false) + : this(env, new Arguments() + { + ModelFile = modelFile, + InputColumns = inputColumns, + OutputColumns = outputColumns, + GpuDeviceId = gpuDeviceId, + FallbackToCpu = fallbackToCpu + }) { } @@ -450,13 +532,32 @@ public NamedOnnxValue GetNamedOnnxValue() /// public sealed class OnnxScoringEstimator : TrivialEstimator { - public OnnxScoringEstimator(IHostEnvironment env, string modelFile) - : this(env, new OnnxTransform(env, modelFile, new string[] { }, new string[] { })) + /// + /// Transform for scoring ONNX models. Input data column names/types must exactly match + /// all model input names. All possible output columns are generated, with names/types + /// specified by model. + /// + /// The environment to use. + /// Model file path. + /// Optional GPU device ID to run execution on. Null for CPU. + /// If GPU error, raise exception or fallback to CPU. + public OnnxScoringEstimator(IHostEnvironment env, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false) + : this(env, new OnnxTransform(env, modelFile, new string[] { }, new string[] { }, gpuDeviceId, fallbackToCpu)) { } - public OnnxScoringEstimator(IHostEnvironment env, string modelFile, string[] inputs, string[] outputs) - : this(env, new OnnxTransform(env, modelFile, inputs, outputs)) + /// + /// Transform for scoring ONNX models. Input data column names/types must exactly match + /// all model input names. Only the output columns specified will be generated. + /// + /// The environment to use. + /// Model file path. + /// The name of the input data columns. Must match model's input names. + /// The output columns to generate. Names must match model specifications. Data types are inferred from model. + /// Optional GPU device ID to run execution on. Null for CPU. + /// If GPU error, raise exception or fallback to CPU. + public OnnxScoringEstimator(IHostEnvironment env, string modelFile, string[] inputColumns, string[] outputColumns, int? gpuDeviceId = null, bool fallbackToCpu = false) + : this(env, new OnnxTransform(env, modelFile, inputColumns, outputColumns, gpuDeviceId, fallbackToCpu)) { } diff --git a/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs index 2c18a495bf..68c6b11797 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs @@ -71,10 +71,36 @@ public OnnxNodeInfo(string name, OnnxShape shape, System.Type type) public readonly List InputNames; public readonly List OutputNames; - public OnnxModel(string modelFile) + /// + /// Constructs OnnxModel object from file. + /// + /// Model file path. + /// GPU device ID to execute on. Null for CPU. + /// If true, resumes CPU execution quitely upon GPU error. + public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false) { _modelFile = modelFile; - _session = new InferenceSession(modelFile); + + if (gpuDeviceId.HasValue) + { + try + { + _session = new InferenceSession(modelFile, SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId.Value)); + } + catch (OnnxRuntimeException) + { + if (fallbackToCpu) + _session = new InferenceSession(modelFile); + else + // if called from OnnxTranform, is caught and rethrown. + throw; + } + } + else + { + _session = new InferenceSession(modelFile); + } + ModelInfo = new OnnxModelInfo(GetInputsInfo(), GetOutputsInfo()); InputNames = ModelInfo.InputsInfo.Select(i => i.Name).ToList(); OutputNames = ModelInfo.OutputsInfo.Select(i => i.Name).ToList(); @@ -83,16 +109,28 @@ public OnnxModel(string modelFile) /// /// Create an OnnxModel from a byte[] /// - /// + /// Bytes of the serialized model /// OnnxModel public static OnnxModel CreateFromBytes(byte[] modelBytes) + { + return CreateFromBytes(modelBytes, null, false); + } + + /// + /// Create an OnnxModel from a byte[]. Set execution to GPU if required. + /// + /// Bytes of the serialized model. + /// GPU device ID to execute on. Null for CPU. + /// If true, resumes CPU execution quitely upon GPU error. + /// OnnxModel + public static OnnxModel CreateFromBytes(byte[] modelBytes, int? gpuDeviceId = null, bool fallbackToCpu = false) { var tempModelDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); Directory.CreateDirectory(tempModelDir); var tempModelFile = Path.Combine(tempModelDir, "model.onnx"); File.WriteAllBytes(tempModelFile, modelBytes); - return new OnnxModel(tempModelFile); + return new OnnxModel(tempModelFile, gpuDeviceId, fallbackToCpu); // TODO: // tempModelFile is needed in case the model needs to be saved @@ -103,8 +141,8 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes) /// /// Uses an open session to score a list of NamedOnnxValues. /// - /// The NamedOnnxValues to score - /// Resulting output NamedOnnxValues list + /// The NamedOnnxValues to score. + /// Resulting output NamedOnnxValues list. public IReadOnlyCollection Run(List inputNamedOnnxValues) { return _session.Run(inputNamedOnnxValues); @@ -170,9 +208,9 @@ internal sealed class OnnxUtils /// /// Creates a NamedOnnxValue from a scalar value. /// - /// The type of the Tensor contained in the NamedOnnxValue - /// The name of the NamedOnnxValue - /// The data values of the Tensor + /// The type of the Tensor contained in the NamedOnnxValue. + /// The name of the NamedOnnxValue. + /// The data values of the Tensor. /// NamedOnnxValue public static NamedOnnxValue CreateScalarNamedOnnxValue(string name, T data) { @@ -185,10 +223,10 @@ public static NamedOnnxValue CreateScalarNamedOnnxValue(string name, T data) /// Create a NamedOnnxValue from vbuffer span. Checks if the tensor type /// is supported by OnnxRuntime prior to execution. /// - /// The type of the Tensor contained in the NamedOnnxValue - /// The name of the NamedOnnxValue + /// The type of the Tensor contained in the NamedOnnxValue. + /// The name of the NamedOnnxValue. /// A span containing the data - /// The shape of the Tensor being created + /// The shape of the Tensor being created. /// NamedOnnxValue public static NamedOnnxValue CreateNamedOnnxValue(string name, ReadOnlySpan data, OnnxShape shape) { diff --git a/src/Microsoft.ML.PCA/PcaTransform.cs b/src/Microsoft.ML.PCA/PcaTransform.cs index eafe60eb97..ee2ce19a74 100644 --- a/src/Microsoft.ML.PCA/PcaTransform.cs +++ b/src/Microsoft.ML.PCA/PcaTransform.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.Collections.Generic; using System.Linq; using System.Text; using Microsoft.ML; @@ -15,8 +14,6 @@ using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; using Microsoft.ML.Numeric; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; using Microsoft.ML.Transforms.Projections; [assembly: LoadableClass(PcaTransform.Summary, typeof(IDataTransform), typeof(PcaTransform), typeof(PcaTransform.Arguments), typeof(SignatureDataTransform), @@ -662,6 +659,7 @@ public static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Argu /// public sealed class PrincipalComponentAnalysisEstimator : IEstimator { + [BestFriend] internal static class Defaults { public const string WeightColumn = null; @@ -722,64 +720,4 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } } - - public static class PcaEstimatorExtensions - { - private sealed class OutPipelineColumn : Vector - { - public readonly Vector Input; - - public OutPipelineColumn(Vector input, string weightColumn, int rank, - int overSampling, bool center, int? seed = null) - : base(new Reconciler(weightColumn, rank, overSampling, center, seed), input) - { - Input = input; - } - } - - private sealed class Reconciler : EstimatorReconciler - { - private readonly PcaTransform.ColumnInfo _colInfo; - - public Reconciler(string weightColumn, int rank, int overSampling, bool center, int? seed = null) - { - _colInfo = new PcaTransform.ColumnInfo( - null, null, weightColumn, rank, overSampling, center, seed); - } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - Contracts.Assert(toOutput.Length == 1); - var outCol = (OutPipelineColumn)toOutput[0]; - var inputColName = inputNames[outCol.Input]; - var outputColName = outputNames[outCol]; - return new PrincipalComponentAnalysisEstimator(env, inputColName, outputColName, - _colInfo.WeightColumn, _colInfo.Rank, _colInfo.Oversampling, - _colInfo.Center, _colInfo.Seed); - } - } - - /// - /// Replaces the input vector with its projection to the principal component subspace, - /// which can significantly reduce size of vector. - /// - /// - /// The column to apply PCA to. - /// The name of the weight column. - /// The number of components in the PCA. - /// Oversampling parameter for randomized PCA training. - /// If enabled, data is centered to be zero mean. - /// The seed for random number generation - /// Vector containing the principal components. - public static Vector ToPrincipalComponents(this Vector input, - string weightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn, - int rank = PrincipalComponentAnalysisEstimator.Defaults.Rank, - int overSampling = PrincipalComponentAnalysisEstimator.Defaults.Oversampling, - bool center = PrincipalComponentAnalysisEstimator.Defaults.Center, - int? seed = null) => new OutPipelineColumn(input, weightColumn, rank, overSampling, center, seed); - } } diff --git a/src/Microsoft.ML.PCA/Properties/AssemblyInfo.cs b/src/Microsoft.ML.PCA/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..443f2304db --- /dev/null +++ b/src/Microsoft.ML.PCA/Properties/AssemblyInfo.cs @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Recommender/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Recommender/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..4cfdbca7bb --- /dev/null +++ b/src/Microsoft.ML.Recommender/Properties/AssemblyInfo.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.ResultProcessor/Properties/AssemblyInfo.cs b/src/Microsoft.ML.ResultProcessor/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..4affe0c29a --- /dev/null +++ b/src/Microsoft.ML.ResultProcessor/Properties/AssemblyInfo.cs @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo("TLC" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs b/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs index 3cccec12f8..1635ad71c9 100644 --- a/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs +++ b/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs @@ -661,7 +661,7 @@ private static bool ValidateMamlOutput(string filename, string[] rawLines, out L }; } - private static bool ParseCommandArguments(IHostEnvironment env, string commandline, out object commandArgs, out ComponentCatalog.LoadableClassInfo commandClass, bool trimExe = true) + internal static bool ParseCommandArguments(IHostEnvironment env, string commandline, out object commandArgs, out ComponentCatalog.LoadableClassInfo commandClass, bool trimExe = true) { string args = commandline; if (trimExe) diff --git a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs index a9aea50895..65b5ed14ea 100644 --- a/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs +++ b/src/Microsoft.ML.StandardLearners/AssemblyInfo.cs @@ -6,7 +6,6 @@ using Microsoft.ML; [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EntryPoints" + PublicKey.Value)] -[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGBM" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)] diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineUtils.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineUtils.cs index 0696ac0516..65ed095a10 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineUtils.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineUtils.cs @@ -75,8 +75,8 @@ public FieldAwareFactorizationMachineScalarRowMapper(IHostEnvironment env, RoleM Contracts.AssertValue(env); Contracts.AssertValue(schema); Contracts.CheckParam(outputSchema.Count == 2, nameof(outputSchema)); - Contracts.CheckParam(outputSchema[0].Type.IsNumber, nameof(outputSchema)); - Contracts.CheckParam(outputSchema[1].Type.IsNumber, nameof(outputSchema)); + Contracts.CheckParam(outputSchema[0].Type is NumberType, nameof(outputSchema)); + Contracts.CheckParam(outputSchema[1].Type is NumberType, nameof(outputSchema)); Contracts.AssertValue(pred); _env = env; diff --git a/src/Microsoft.ML.StandardLearners/Properties/AssemblyInfo.cs b/src/Microsoft.ML.StandardLearners/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..aae3561544 --- /dev/null +++ b/src/Microsoft.ML.StandardLearners/Properties/AssemblyInfo.cs @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] + +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs index 8bf4438ad6..2c7286006c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs @@ -84,10 +84,7 @@ public IEnumerator GetEnumerator() return _pred.Weight.Items(all: true).Select(iv => iv.Value).GetEnumerator(); } - IEnumerator IEnumerable.GetEnumerator() - { - return GetEnumerator(); - } + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); } /// The predictor's feature weight coefficients. @@ -102,6 +99,10 @@ IEnumerator IEnumerable.GetEnumerator() bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true; + /// + /// Used to determine the contribution of each feature to the score of an example by . + /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. + /// public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this); /// @@ -505,7 +506,7 @@ private protected override void SaveSummary(TextWriter writer, RoleMappedSchema writer.WriteLine(LinearPredictorUtils.LinearModelAsText("Linear Binary Classification Predictor", null, null, in weights, Bias, schema)); - _stats?.SaveText(writer, this, schema, 20); + _stats?.SaveText(writer, this, schema.Feature.Value, 20); } /// @@ -516,7 +517,7 @@ IList> ICanGetSummaryInKeyValuePairs.GetSummaryInKe var weights = Weight; List> results = new List>(); LinearPredictorUtils.SaveLinearModelWeightsInKeyValuePairs(in weights, Bias, schema, results); - _stats?.SaveSummaryInKeyValuePairs(this, schema, int.MaxValue, results); + _stats?.SaveSummaryInKeyValuePairs(this, schema.Feature.Value, int.MaxValue, results); return results; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index 194780e59d..e62d354177 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -92,14 +92,15 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight [Argument(ArgumentType.AtMostOnce, HelpText = "Enforce non-negative weights", ShortName = "nn", SortOrder = 90)] public bool EnforceNonNegativity = Defaults.EnforceNonNegativity; + [BestFriend] internal static class Defaults { - internal const float L2Weight = 1; - internal const float L1Weight = 1; - internal const float OptTol = 1e-7f; - internal const int MemorySize = 20; - internal const int MaxIterations = int.MaxValue; - internal const bool EnforceNonNegativity = false; + public const float L2Weight = 1; + public const float L1Weight = 1; + public const float OptTol = 1e-7f; + public const int MemorySize = 20; + public const int MaxIterations = int.MaxValue; + public const bool EnforceNonNegativity = false; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 5d524f5393..b7c08b2370 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -783,7 +783,7 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) } if (_stats != null) - _stats.SaveText(writer, null, schema, 20); + _stats.SaveText(writer, null, schema.Feature.Value, 20); } /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 09b6974b01..905dfd17ba 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -27,12 +27,12 @@ namespace Microsoft.ML.Learners public readonly struct CoefficientStatistics { public readonly string Name; - public readonly Single Estimate; - public readonly Single StandardError; - public readonly Single ZScore; - public readonly Single PValue; + public readonly float Estimate; + public readonly float StandardError; + public readonly float ZScore; + public readonly float PValue; - public CoefficientStatistics(string name, Single estimate, Single stdError, Single zScore, Single pValue) + public CoefficientStatistics(string name, float estimate, float stdError, float zScore, float pValue) { Contracts.AssertNonEmpty(name); Name = name; @@ -69,10 +69,10 @@ private static VersionInfo GetVersionInfo() private readonly long _trainingExampleCount; // The deviance of this model. - private readonly Single _deviance; + private readonly float _deviance; // The deviance of the null hypothesis. - private readonly Single _nullDeviance; + private readonly float _nullDeviance; // Total count of parameters. private readonly int _paramCount; @@ -82,17 +82,17 @@ private static VersionInfo GetVersionInfo() // It could be null when there are too many non-zero weights so that // the memory is insufficient to hold the Hessian matrix necessary for the computation // of the variance-covariance matrix. - private readonly VBuffer? _coeffStdError; + private readonly VBuffer? _coeffStdError; public long TrainingExampleCount => _trainingExampleCount; - public Single Deviance => _deviance; + public float Deviance => _deviance; - public Single NullDeviance => _nullDeviance; + public float NullDeviance => _nullDeviance; public int ParametersCount => _paramCount; - internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance) + internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, float deviance, float nullDeviance) { Contracts.AssertValue(env); env.Assert(trainingExampleCount > 0); @@ -104,7 +104,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, _nullDeviance = nullDeviance; } - internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, in VBuffer coeffStdError) + internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, float deviance, float nullDeviance, in VBuffer coeffStdError) : this(env, trainingExampleCount, paramCount, deviance, nullDeviance) { _env.Assert(coeffStdError.GetValues().Length == _paramCount); @@ -120,10 +120,10 @@ internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) // *** Binary Format *** // int: count of parameters // long: count of training examples - // Single: deviance - // Single: null deviance + // float: deviance + // float: null deviance // bool: whether standard error is included - // (Conditional) Single[_paramCount]: values of std errors of coefficients + // (Conditional) float[_paramCount]: values of std errors of coefficients // (Conditional) int: length of std errors of coefficients // (Conditional) int[_paramCount]: indices of std errors of coefficients @@ -143,18 +143,18 @@ internal LinearModelStatistics(IHostEnvironment env, ModelLoadContext ctx) return; } - Single[] stdErrorValues = ctx.Reader.ReadFloatArray(_paramCount); + float[] stdErrorValues = ctx.Reader.ReadFloatArray(_paramCount); int length = ctx.Reader.ReadInt32(); _env.CheckDecode(length >= _paramCount); if (length == _paramCount) { - _coeffStdError = new VBuffer(length, stdErrorValues); + _coeffStdError = new VBuffer(length, stdErrorValues); return; } _env.Assert(length > _paramCount); int[] stdErrorIndices = ctx.Reader.ReadIntArray(_paramCount); - _coeffStdError = new VBuffer(length, _paramCount, stdErrorValues, stdErrorIndices); + _coeffStdError = new VBuffer(length, _paramCount, stdErrorValues, stdErrorIndices); } internal static LinearModelStatistics Create(IHostEnvironment env, ModelLoadContext ctx) @@ -178,10 +178,10 @@ private void SaveCore(ModelSaveContext ctx) // *** Binary Format *** // int: count of parameters // long: count of training examples - // Single: deviance - // Single: null deviance + // float: deviance + // float: null deviance // bool: whether standard error is included - // (Conditional) Single[_paramCount]: values of std errors of coefficients + // (Conditional) float[_paramCount]: values of std errors of coefficients // (Conditional) int: length of std errors of coefficients // (Conditional) int[_paramCount]: indices of std errors of coefficients @@ -212,7 +212,7 @@ private void SaveCore(ModelSaveContext ctx) /// /// Computes the standart deviation, Z-Score and p-Value. /// - public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) + public static bool TryGetBiasStatistics(LinearModelStatistics stats, float bias, out float stdError, out float zScore, out float pValue) { if (!stats._coeffStdError.HasValue) { @@ -226,12 +226,12 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias stdError = stats._coeffStdError.Value.GetValues()[0]; Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0)); zScore = bias / stdError; - pValue = 1.0f - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); + pValue = 1.0f - (float)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); return true; } - private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer weights, in VBuffer> names, - ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter>> getSlotNames) + private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, in VBuffer weights, in VBuffer> names, + ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter>> getSlotNames) { if (!stats._coeffStdError.HasValue) { @@ -260,7 +260,7 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat var weight = estimateEditor.Values[i - 1] = weights.GetItemOrDefault(wi); var stdError = stdErrorEditor.Values[wi] = coeffStdErrorValues[i]; zScoreEditor.Values[i - 1] = weight / stdError; - pValueEditor.Values[i - 1] = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScoreEditor.Values[i - 1] / sqrt2)); + pValueEditor.Values[i - 1] = 1 - (float)ProbabilityFunctions.Erf(Math.Abs(zScoreEditor.Values[i - 1] / sqrt2)); } estimate = estimateEditor.Commit(); @@ -283,7 +283,7 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat }; } - private List GetUnorderedCoefficientStatistics(LinearBinaryModelParameters parent, RoleMappedSchema schema) + private List GetUnorderedCoefficientStatistics(LinearBinaryModelParameters parent, Schema.Column featureColumn) { Contracts.AssertValue(_env); _env.CheckValue(parent, nameof(parent)); @@ -291,12 +291,14 @@ private List GetUnorderedCoefficientStatistics(LinearBina if (!_coeffStdError.HasValue) return new List(); - var weights = parent.Weights as IReadOnlyList; + var weights = parent.Weights as IReadOnlyList; _env.Assert(_paramCount == 1 || weights != null); _env.Assert(_coeffStdError.Value.Length == weights.Count + 1); var names = default(VBuffer>); - MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Count, ref names); + + featureColumn.Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names); + _env.Assert(names.Length > 0, "FeatureColumn has no metadata."); ReadOnlySpan stdErrorValues = _coeffStdError.Value.GetValues(); const Double sqrt2 = 1.41421356237; // Math.Sqrt(2); @@ -304,7 +306,7 @@ private List GetUnorderedCoefficientStatistics(LinearBina List result = new List(_paramCount - 1); bool denseStdError = _coeffStdError.Value.IsDense; ReadOnlySpan stdErrorIndices = _coeffStdError.Value.GetIndices(); - Single[] zScores = new Single[_paramCount - 1]; + float[] zScores = new float[_paramCount - 1]; for (int i = 1; i < _paramCount; i++) { int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1; @@ -315,7 +317,7 @@ private List GetUnorderedCoefficientStatistics(LinearBina var weight = weights[wi]; var stdError = stdErrorValues[i]; var zScore = zScores[i - 1] = weight / stdError; - var pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); + var pValue = 1 - (float)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); result.Add(new CoefficientStatistics(name, weight, stdError, zScore, pValue)); } return result; @@ -324,33 +326,31 @@ private List GetUnorderedCoefficientStatistics(LinearBina /// /// Gets the coefficient statistics as an object. /// - internal CoefficientStatistics[] GetCoefficientStatistics(LinearBinaryModelParameters parent, RoleMappedSchema schema, int paramCountCap) + public CoefficientStatistics[] GetCoefficientStatistics(LinearBinaryModelParameters parent, Schema.Column featureColumn, int paramCountCap) { Contracts.AssertValue(_env); _env.CheckValue(parent, nameof(parent)); - _env.CheckValue(schema, nameof(schema)); _env.CheckParam(paramCountCap >= 0, nameof(paramCountCap)); if (paramCountCap > _paramCount) paramCountCap = _paramCount; - Single stdError; - Single zScore; - Single pValue; + float stdError; + float zScore; + float pValue; var bias = parent.Bias; if (!TryGetBiasStatistics(parent.Statistics, bias, out stdError, out zScore, out pValue)) return null; - var order = GetUnorderedCoefficientStatistics(parent, schema).OrderByDescending(stat => stat.ZScore).Take(paramCountCap - 1); + var order = GetUnorderedCoefficientStatistics(parent, featureColumn).OrderByDescending(stat => stat.ZScore).Take(paramCountCap - 1); return order.Prepend(new[] { new CoefficientStatistics("(Bias)", bias, stdError, zScore, pValue) }).ToArray(); } - internal void SaveText(TextWriter writer, LinearBinaryModelParameters parent, RoleMappedSchema schema, int paramCountCap) + internal void SaveText(TextWriter writer, LinearBinaryModelParameters parent, Schema.Column featureColumn, int paramCountCap) { Contracts.AssertValue(_env); _env.CheckValue(writer, nameof(writer)); _env.AssertValueOrNull(parent); - _env.AssertValueOrNull(schema); writer.WriteLine(); writer.WriteLine("*** MODEL STATISTICS SUMMARY *** "); writer.WriteLine("Count of training examples:\t{0}", _trainingExampleCount); @@ -361,7 +361,7 @@ internal void SaveText(TextWriter writer, LinearBinaryModelParameters parent, Ro if (parent == null) return; - var coeffStats = GetCoefficientStatistics(parent, schema, paramCountCap); + var coeffStats = GetCoefficientStatistics(parent, featureColumn, paramCountCap); if (coeffStats == null) return; @@ -387,7 +387,7 @@ internal void SaveText(TextWriter writer, LinearBinaryModelParameters parent, Ro /// Support method for linear models and . /// internal void SaveSummaryInKeyValuePairs(LinearBinaryModelParameters parent, - RoleMappedSchema schema, int paramCountCap, List> resultCollection) + Schema.Column featureColumn, int paramCountCap, List> resultCollection) { Contracts.AssertValue(_env); _env.AssertValue(resultCollection); @@ -400,7 +400,7 @@ internal void SaveSummaryInKeyValuePairs(LinearBinaryModelParameters parent, if (parent == null) return; - var coeffStats = GetCoefficientStatistics(parent, schema, paramCountCap); + var coeffStats = GetCoefficientStatistics(parent, featureColumn, paramCountCap); if (coeffStats == null) return; @@ -408,7 +408,7 @@ internal void SaveSummaryInKeyValuePairs(LinearBinaryModelParameters parent, { resultCollection.Add(new KeyValuePair( coeffStat.Name, - new Single[] { coeffStat.Estimate, coeffStat.StandardError, coeffStat.ZScore, coeffStat.PValue })); + new float[] { coeffStat.Estimate, coeffStat.StandardError, coeffStat.ZScore, coeffStat.PValue })); } } @@ -458,7 +458,7 @@ internal Schema.Metadata MakeStatisticsMetadata(LinearBinaryModelParameters pare return builder.GetMetadata(); } - private string DecorateProbabilityString(Single probZ) + private string DecorateProbabilityString(float probZ) { Contracts.AssertValue(_env); _env.Assert(0 <= probZ && probZ <= 1); diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml index 3f352e1218..6cb6e2a24a 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml @@ -14,10 +14,6 @@ This multi-class trainer accepts binary feature values of type float, i.e., feature values are either true or false. Specifically a feature value greater than zero is treated as true. - - - - @@ -47,10 +43,6 @@ request caching, as it will be performing multiple passes over the data set. These learner will request normalization from the data pipeline if the classifier indicates it would benefit from it. - - - - pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index ebcc3742e5..c144c5c90b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -52,11 +52,12 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")] public Float AveragedTolerance = (Float)1e-2; + [BestFriend] internal class AveragedDefaultArgs : OnlineDefaultArgs { - internal const Float LearningRate = 1; - internal const bool DecreaseLearningRate = false; - internal const Float L2RegularizerWeight = 0; + public const Float LearningRate = 1; + public const bool DecreaseLearningRate = false; + public const Float L2RegularizerWeight = 0; } internal abstract IComponentFactory LossFunctionFactory { get; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 6df0bde35d..f65aeb4f2b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -170,7 +170,7 @@ protected override void CheckLabelCompatible(SchemaShape.Column labelCol) if (labelCol.Kind != SchemaShape.Column.VectorKind.Scalar) error(); - if (!labelCol.IsKey && labelCol.ItemType != NumberType.R4 && labelCol.ItemType != NumberType.R8 && !labelCol.ItemType.IsBool) + if (!labelCol.IsKey && labelCol.ItemType != NumberType.R4 && labelCol.ItemType != NumberType.R8 && !(labelCol.ItemType is BoolType)) error(); } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs index dd3395f3ee..769ea0b151 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs @@ -52,10 +52,11 @@ public Arguments() internal override IComponentFactory LossFunctionFactory => LossFunction; + [BestFriend] internal class OgdDefaultArgs : AveragedDefaultArgs { - internal new const float LearningRate = 0.1f; - internal new const bool DecreaseLearningRate = true; + public new const float LearningRate = 0.1f; + public new const bool DecreaseLearningRate = true; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index 89de8a964f..68932db845 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -41,9 +41,10 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")] public int StreamingCacheSize = 1000000; + [BestFriend] internal class OnlineDefaultArgs { - internal const int NumIterations = 1; + public const int NumIterations = 1; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index 24feb7fb40..84cba339f6 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -1551,7 +1551,7 @@ protected override void CheckLabelCompatible(SchemaShape.Column labelCol) if (labelCol.Kind != SchemaShape.Column.VectorKind.Scalar) error(); - if (!labelCol.IsKey && labelCol.ItemType != NumberType.R4 && labelCol.ItemType != NumberType.R8 && !labelCol.ItemType.IsBool) + if (!labelCol.IsKey && labelCol.ItemType != NumberType.R4 && labelCol.ItemType != NumberType.R8 && !(labelCol.ItemType is BoolType)) error(); } @@ -1658,11 +1658,13 @@ internal void Check(IHostEnvironment env) if (ConvergenceTolerance <= 0) ConvergenceTolerance = float.Epsilon; } + + [BestFriend] internal static class Defaults { - internal const float L2Weight = 1e-6f; - internal const int MaxIterations = 20; - internal const double InitLearningRate = 0.01; + public const float L2Weight = 1e-6f; + public const int MaxIterations = 20; + public const double InitLearningRate = 0.01; } } diff --git a/src/Microsoft.ML.Data/StaticPipe/Attributes.cs b/src/Microsoft.ML.StaticPipe/Attributes.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/Attributes.cs rename to src/Microsoft.ML.StaticPipe/Attributes.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs b/src/Microsoft.ML.StaticPipe/DataLoadSaveOperationsExtensions.cs similarity index 98% rename from src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs rename to src/Microsoft.ML.StaticPipe/DataLoadSaveOperationsExtensions.cs index db30ecd454..b63ad6f461 100644 --- a/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/DataLoadSaveOperationsExtensions.cs @@ -4,7 +4,7 @@ using System; using Microsoft.ML.Data; -using static Microsoft.ML.Data.TextLoader; +using static Microsoft.ML.StaticPipe.TextLoaderStatic; namespace Microsoft.ML.StaticPipe { diff --git a/src/Microsoft.ML.Data/StaticPipe/DataReader.cs b/src/Microsoft.ML.StaticPipe/DataReader.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/DataReader.cs rename to src/Microsoft.ML.StaticPipe/DataReader.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/DataReaderEstimator.cs b/src/Microsoft.ML.StaticPipe/DataReaderEstimator.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/DataReaderEstimator.cs rename to src/Microsoft.ML.StaticPipe/DataReaderEstimator.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/DataView.cs b/src/Microsoft.ML.StaticPipe/DataView.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/DataView.cs rename to src/Microsoft.ML.StaticPipe/DataView.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/Estimator.cs b/src/Microsoft.ML.StaticPipe/Estimator.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/Estimator.cs rename to src/Microsoft.ML.StaticPipe/Estimator.cs diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/EvaluatorStaticExtensions.cs similarity index 99% rename from src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs rename to src/Microsoft.ML.StaticPipe/EvaluatorStaticExtensions.cs index 8d66a8a53c..2712318c81 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/EvaluatorStaticExtensions.cs @@ -3,10 +3,10 @@ // See the LICENSE file in the project root for more information. using System; -using Microsoft.ML.StaticPipe; +using Microsoft.ML.Data; using Microsoft.ML.StaticPipe.Runtime; -namespace Microsoft.ML.Data +namespace Microsoft.ML.StaticPipe { /// /// Extension methods for evaluation. diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs b/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs similarity index 100% rename from src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs rename to src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs diff --git a/src/Microsoft.ML.ImageAnalytics/ImageStaticPipe.cs b/src/Microsoft.ML.StaticPipe/ImageStaticPipe.cs similarity index 91% rename from src/Microsoft.ML.ImageAnalytics/ImageStaticPipe.cs rename to src/Microsoft.ML.StaticPipe/ImageStaticPipe.cs index aaf99431ee..f23bf61f99 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageStaticPipe.cs +++ b/src/Microsoft.ML.StaticPipe/ImageStaticPipe.cs @@ -4,9 +4,9 @@ using System; using System.Drawing; -using Microsoft.ML.StaticPipe; +using Microsoft.ML.ImageAnalytics; -namespace Microsoft.ML.ImageAnalytics +namespace Microsoft.ML.StaticPipe { /// /// A type used in the generic argument to . We must simultaneously distinguish @@ -33,7 +33,7 @@ public static Custom LoadAsImage(this Scalar path, st { Contracts.CheckValue(path, nameof(path)); Contracts.CheckValueOrNull(relativeTo); - return new ImageLoadingEstimator.OutPipelineColumn(path, relativeTo); + return new ImageLoadingStaticExtensions.OutPipelineColumn(path, relativeTo); } /// @@ -45,7 +45,7 @@ public static Custom LoadAsImage(this Scalar path, st public static Custom AsGrayscale(this Custom input) { Contracts.CheckValue(input, nameof(input)); - return new ImageGrayscalingEstimator.OutPipelineColumn(input); + return new ImageGreyScalingStaticExtensions.OutPipelineColumn(input); } /// @@ -57,7 +57,7 @@ public static Custom AsGrayscale(this Custom AsGrayscale(this Custom input) { Contracts.CheckValue(input, nameof(input)); - return new ImageGrayscalingEstimator.OutPipelineColumn(input); + return new ImageGreyScalingStaticExtensions.OutPipelineColumn(input); } /// @@ -80,7 +80,7 @@ public static Custom Resize(this Custom input, int wi Contracts.CheckParam(Enum.IsDefined(typeof(ImageResizerTransform.ResizingKind), resizing), nameof(resizing), "Undefined value detected"); Contracts.CheckParam(Enum.IsDefined(typeof(ImageResizerTransform.Anchor), cropAnchor), nameof(cropAnchor), "Undefined value detected"); - return new ImageResizingEstimator.OutPipelineColumn(input, width, height, resizing, cropAnchor); + return new ImageResizingStaticExtensions.OutPipelineColumn(input, width, height, resizing, cropAnchor); } /// @@ -103,7 +103,7 @@ public static Custom Resize(this Custom input, int width, int he Contracts.CheckParam(Enum.IsDefined(typeof(ImageResizerTransform.ResizingKind), resizing), nameof(resizing), "Undefined value detected"); Contracts.CheckParam(Enum.IsDefined(typeof(ImageResizerTransform.Anchor), cropAnchor), nameof(cropAnchor), "Undefined value detected"); - return new ImageResizingEstimator.OutPipelineColumn(input, width, height, resizing, cropAnchor); + return new ImageResizingStaticExtensions.OutPipelineColumn(input, width, height, resizing, cropAnchor); } /// @@ -135,7 +135,7 @@ public static Vector ExtractPixels(this Custom input, bool useAlp Offset = offset, Convert = true }; - return new ImagePixelExtractingEstimator.OutPipelineColumn(input, colParams); + return new ImagePixelExtractingStaticExtensions.OutPipelineColumn(input, colParams); } /// @@ -163,7 +163,7 @@ public static Vector ExtractPixelsAsBytes(this Custom input, bool InterleaveArgb = interleaveArgb, Convert = false }; - return new ImagePixelExtractingEstimator.OutPipelineColumn(input, colParams); + return new ImagePixelExtractingStaticExtensions.OutPipelineColumn(input, colParams); } } } diff --git a/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs b/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs new file mode 100644 index 0000000000..61fe2eee49 --- /dev/null +++ b/src/Microsoft.ML.StaticPipe/ImageTransformsStatic.cs @@ -0,0 +1,246 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Drawing; +using Microsoft.ML.Core.Data; +using Microsoft.ML.ImageAnalytics; +using Microsoft.ML.StaticPipe.Runtime; + +namespace Microsoft.ML.StaticPipe +{ + public static class ImageLoadingStaticExtensions + { + internal sealed class OutPipelineColumn : Custom + { + private readonly Scalar _input; + + public OutPipelineColumn(Scalar path, string relativeTo) + : base(new Reconciler(relativeTo), path) + { + Contracts.AssertValue(path); + _input = path; + } + + /// + /// Reconciler to an for the . + /// + /// + /// We must create a new reconciler per call, because the relative path of + /// is considered a transform-wide option, as it is not specified in . However, we still + /// implement so the analyzer can still equate two of these things if they happen to share the same + /// path, so we can be a bit more efficient with respect to our estimator declarations. + /// + /// + private sealed class Reconciler : EstimatorReconciler, IEquatable + { + private readonly string _relTo; + + public Reconciler(string relativeTo) + { + Contracts.AssertValueOrNull(relativeTo); + _relTo = relativeTo; + } + + public bool Equals(Reconciler other) + => other != null && other._relTo == _relTo; + + public override bool Equals(object obj) + => obj is Reconciler other && Equals(other); + + public override int GetHashCode() + => _relTo?.GetHashCode() ?? 0; + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + var cols = new (string input, string output)[toOutput.Length]; + for (int i = 0; i < toOutput.Length; ++i) + { + var outCol = (OutPipelineColumn)toOutput[i]; + cols[i] = (inputNames[outCol._input], outputNames[outCol]); + } + return new ImageLoadingEstimator(env, _relTo, cols); + } + } + } + } + + public static class ImageGreyScalingStaticExtensions + { + private interface IColInput + { + PipelineColumn Input { get; } + } + + internal sealed class OutPipelineColumn : Custom, IColInput + { + public PipelineColumn Input { get; } + + public OutPipelineColumn(Custom input) + : base(Reconciler.Inst, input) + { + Contracts.AssertValue(input); + Contracts.Assert(typeof(T) == typeof(Bitmap) || typeof(T) == typeof(UnknownSizeBitmap)); + Input = input; + } + } + + /// + /// Reconciler to an for the . + /// + /// Because we want to use the same reconciler for + /// + /// + private sealed class Reconciler : EstimatorReconciler + { + public static Reconciler Inst = new Reconciler(); + + private Reconciler() { } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + var cols = new (string input, string output)[toOutput.Length]; + for (int i = 0; i < toOutput.Length; ++i) + { + var outCol = (IColInput)toOutput[i]; + cols[i] = (inputNames[outCol.Input], outputNames[toOutput[i]]); + } + return new ImageGrayscalingEstimator(env, cols); + } + } + } + + public static class ImageResizingStaticExtensions + { + internal sealed class OutPipelineColumn : Custom + { + private readonly PipelineColumn _input; + private readonly int _width; + private readonly int _height; + private readonly ImageResizerTransform.ResizingKind _resizing; + private readonly ImageResizerTransform.Anchor _cropAnchor; + + public OutPipelineColumn(PipelineColumn input, int width, int height, + ImageResizerTransform.ResizingKind resizing, ImageResizerTransform.Anchor cropAnchor) + : base(Reconciler.Inst, input) + { + Contracts.AssertValue(input); + _input = input; + _width = width; + _height = height; + _resizing = resizing; + _cropAnchor = cropAnchor; + } + + private ImageResizerTransform.ColumnInfo MakeColumnInfo(string input, string output) + => new ImageResizerTransform.ColumnInfo(input, output, _width, _height, _resizing, _cropAnchor); + + /// + /// Reconciler to an for the . + /// + /// + /// + private sealed class Reconciler : EstimatorReconciler + { + public static Reconciler Inst = new Reconciler(); + + private Reconciler() + { + } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + var cols = new ImageResizerTransform.ColumnInfo[toOutput.Length]; + for (int i = 0; i < toOutput.Length; ++i) + { + var outCol = (OutPipelineColumn)toOutput[i]; + cols[i] = outCol.MakeColumnInfo(inputNames[outCol._input], outputNames[outCol]); + } + return new ImageResizingEstimator(env, cols); + } + } + } + } + + public static class ImagePixelExtractingStaticExtensions + { + private interface IColInput + { + Custom Input { get; } + + ImagePixelExtractorTransform.ColumnInfo MakeColumnInfo(string input, string output); + } + + internal sealed class OutPipelineColumn : Vector, IColInput + { + public Custom Input { get; } + private static readonly ImagePixelExtractorTransform.Arguments _defaultArgs = new ImagePixelExtractorTransform.Arguments(); + private readonly ImagePixelExtractorTransform.Column _colParam; + + public OutPipelineColumn(Custom input, ImagePixelExtractorTransform.Column col) + : base(Reconciler.Inst, input) + { + Contracts.AssertValue(input); + Contracts.Assert(typeof(T) == typeof(float) || typeof(T) == typeof(byte)); + Input = input; + _colParam = col; + } + + public ImagePixelExtractorTransform.ColumnInfo MakeColumnInfo(string input, string output) + { + // In principle, the analyzer should only call the the reconciler once for these columns. + Contracts.Assert(_colParam.Source == null); + Contracts.Assert(_colParam.Name == null); + + _colParam.Name = output; + _colParam.Source = input; + return new ImagePixelExtractorTransform.ColumnInfo(_colParam, _defaultArgs); + } + } + + /// + /// Reconciler to an for the . + /// + /// Because we want to use the same reconciler for + /// + /// + private sealed class Reconciler : EstimatorReconciler + { + /// + /// Because there are no global settings that cannot be overridden, we can always just use the same reconciler. + /// + public static Reconciler Inst = new Reconciler(); + + private Reconciler() { } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + var cols = new ImagePixelExtractorTransform.ColumnInfo[toOutput.Length]; + for (int i = 0; i < toOutput.Length; ++i) + { + var outCol = (IColInput)toOutput[i]; + cols[i] = outCol.MakeColumnInfo(inputNames[outCol.Input], outputNames[toOutput[i]]); + } + return new ImagePixelExtractingEstimator(env, cols); + } + } + } +} diff --git a/src/Microsoft.ML.KMeansClustering/KMeansStatic.cs b/src/Microsoft.ML.StaticPipe/KMeansStatic.cs similarity index 100% rename from src/Microsoft.ML.KMeansClustering/KMeansStatic.cs rename to src/Microsoft.ML.StaticPipe/KMeansStatic.cs diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs similarity index 99% rename from src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs rename to src/Microsoft.ML.StaticPipe/LbfgsStatic.cs index 4ee64f9ff6..f8a8fb9b2e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/LbfgsStatic.cs @@ -15,7 +15,7 @@ namespace Microsoft.ML.StaticPipe /// /// Binary Classification trainer estimators. /// - public static class LbfgsBinaryClassificationExtensions + public static class LbfgsBinaryClassificationStaticExtensions { /// /// Predict a target using a linear binary classification model trained with the trainer. diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs b/src/Microsoft.ML.StaticPipe/MatrixFactorizationStatic.cs similarity index 100% rename from src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs rename to src/Microsoft.ML.StaticPipe/MatrixFactorizationStatic.cs diff --git a/src/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.csproj b/src/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.csproj index 61888ee627..c97abb5357 100644 --- a/src/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.csproj +++ b/src/Microsoft.ML.StaticPipe/Microsoft.ML.StaticPipe.csproj @@ -1,11 +1,16 @@ - + netstandard2.0 + Microsoft.ML.StaticPipe - + + + + + diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesStatic.cs b/src/Microsoft.ML.StaticPipe/MultiClassNaiveBayesStatic.cs similarity index 94% rename from src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesStatic.cs rename to src/Microsoft.ML.StaticPipe/MultiClassNaiveBayesStatic.cs index 172728c4c2..d730c650a2 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesStatic.cs +++ b/src/Microsoft.ML.StaticPipe/MultiClassNaiveBayesStatic.cs @@ -3,15 +3,15 @@ // See the LICENSE file in the project root for more information. using System; -using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; +using Microsoft.ML.Trainers; -namespace Microsoft.ML.Trainers +namespace Microsoft.ML.StaticPipe { /// /// MultiClass Classification trainer estimators. /// - public static partial class MultiClassClassificationTrainers + public static partial class MultiClassClassificationStaticExtensions { /// /// Predict a target using a linear multiclass classification model trained with the trainer. diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerStatic.cs b/src/Microsoft.ML.StaticPipe/OnlineLearnerStatic.cs similarity index 99% rename from src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerStatic.cs rename to src/Microsoft.ML.StaticPipe/OnlineLearnerStatic.cs index bdfd2c88db..aa120b1b75 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerStatic.cs +++ b/src/Microsoft.ML.StaticPipe/OnlineLearnerStatic.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML.StaticPipe /// /// Binary Classification trainer estimators. /// - public static class AveragedPerceptronExtensions + public static class AveragedPerceptronStaticExtensions { /// /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer, and a custom loss. diff --git a/src/Microsoft.ML.Data/StaticPipe/PipelineColumn.cs b/src/Microsoft.ML.StaticPipe/PipelineColumn.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/PipelineColumn.cs rename to src/Microsoft.ML.StaticPipe/PipelineColumn.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/Reconciler.cs b/src/Microsoft.ML.StaticPipe/Reconciler.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/Reconciler.cs rename to src/Microsoft.ML.StaticPipe/Reconciler.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/SchemaAssertionContext.cs b/src/Microsoft.ML.StaticPipe/SchemaAssertionContext.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/SchemaAssertionContext.cs rename to src/Microsoft.ML.StaticPipe/SchemaAssertionContext.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs b/src/Microsoft.ML.StaticPipe/SchemaBearing.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs rename to src/Microsoft.ML.StaticPipe/SchemaBearing.cs diff --git a/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs similarity index 98% rename from src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs rename to src/Microsoft.ML.StaticPipe/SgdStatic.cs index a49ab88106..5e5bd98d08 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs +++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML.StaticPipe /// /// Binary Classification trainer estimators. /// - public static class SgdExtensions + public static class SgdStaticExtensions { /// /// Predict a target using a linear binary classification model trained with the trainer. diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticPipeExtensions.cs b/src/Microsoft.ML.StaticPipe/StaticPipeExtensions.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/StaticPipeExtensions.cs rename to src/Microsoft.ML.StaticPipe/StaticPipeExtensions.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticPipeInternalUtils.cs b/src/Microsoft.ML.StaticPipe/StaticPipeInternalUtils.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/StaticPipeInternalUtils.cs rename to src/Microsoft.ML.StaticPipe/StaticPipeInternalUtils.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticPipeUtils.cs b/src/Microsoft.ML.StaticPipe/StaticPipeUtils.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/StaticPipeUtils.cs rename to src/Microsoft.ML.StaticPipe/StaticPipeUtils.cs diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs b/src/Microsoft.ML.StaticPipe/StaticSchemaShape.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs rename to src/Microsoft.ML.StaticPipe/StaticSchemaShape.cs diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs b/src/Microsoft.ML.StaticPipe/TextLoaderStatic.cs similarity index 91% rename from src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs rename to src/Microsoft.ML.StaticPipe/TextLoaderStatic.cs index 708cd30647..403db281c2 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TextLoaderStatic.cs @@ -5,12 +5,12 @@ using System; using System.Collections.Generic; using Microsoft.ML.Core.Data; -using Microsoft.ML.StaticPipe; +using Microsoft.ML.Data; using Microsoft.ML.StaticPipe.Runtime; -namespace Microsoft.ML.Data +namespace Microsoft.ML.StaticPipe { - public sealed partial class TextLoader + public static class TextLoaderStatic { /// /// Configures a reader for text files. @@ -46,7 +46,7 @@ public static DataReader CreateReader<[IsShape] TSha env.CheckValueOrNull(files); // Populate all args except the columns. - var args = new Arguments(); + var args = new TextLoader.Arguments(); args.AllowQuoting = allowQuoting; args.AllowSparse = allowSparse; args.HasHeader = hasHeader; @@ -66,10 +66,10 @@ public static DataReader CreateReader<[IsShape] TSha private sealed class TextReconciler : ReaderReconciler { - private readonly Arguments _args; + private readonly TextLoader.Arguments _args; private readonly IMultiStreamSource _files; - public TextReconciler(Arguments args, IMultiStreamSource files) + public TextReconciler(TextLoader.Arguments args, IMultiStreamSource files) { Contracts.AssertValue(args); Contracts.AssertValueOrNull(files); @@ -86,7 +86,7 @@ public override IDataReaderEstimator - /// Creates a object corresponding to the , with everything - /// filled in except . + /// Creates a object corresponding to the , with everything + /// filled in except . /// - Column Create(); + TextLoader.Column Create(); } /// /// Context object by which a user can indicate what fields they want to read from a text file, and what data type they ought to have. /// Instances of this class are never made but the user, but rather are fed into the delegate in - /// . + /// . /// public sealed class Context { @@ -252,12 +252,12 @@ public MyKey(Reconciler rec, DataKind kind, int oridinal, ulong minKeyValue, ulo } // Translate the internal variable representation to columns of TextLoader. - public Column Create() + public TextLoader.Column Create() { - return new Column() + return new TextLoader.Column() { Type = _kind, - Source = new[] { new Range(_oridinal) }, + Source = new[] { new TextLoader.Range(_oridinal) }, KeyRange = new KeyRange(_minKeyValue, _maxKeyValue) }; } @@ -275,12 +275,12 @@ public MyScalar(Reconciler rec, DataKind kind, int ordinal) _ordinal = ordinal; } - public Column Create() + public TextLoader.Column Create() { - return new Column() + return new TextLoader.Column() { Type = _kind, - Source = new[] { new Range(_ordinal) }, + Source = new[] { new TextLoader.Range(_ordinal) }, }; } } @@ -299,12 +299,12 @@ public MyVector(Reconciler rec, DataKind kind, int min, int? max) _max = max; } - public Column Create() + public TextLoader.Column Create() { - return new Column() + return new TextLoader.Column() { Type = _kind, - Source = new[] { new Range(_min, _max) }, + Source = new[] { new TextLoader.Range(_min, _max) }, }; } } diff --git a/src/Microsoft.ML.Data/StaticPipe/TrainerEstimatorReconciler.cs b/src/Microsoft.ML.StaticPipe/TrainerEstimatorReconciler.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/TrainerEstimatorReconciler.cs rename to src/Microsoft.ML.StaticPipe/TrainerEstimatorReconciler.cs diff --git a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TrainingStaticExtensions.cs similarity index 99% rename from src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs rename to src/Microsoft.ML.StaticPipe/TrainingStaticExtensions.cs index b950f87180..30f5a300a6 100644 --- a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/TrainingStaticExtensions.cs @@ -6,10 +6,9 @@ using System.Linq; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; -namespace Microsoft.ML +namespace Microsoft.ML.StaticPipe { /// /// Defines static extension methods that allow operations like train-test split, cross-validate, diff --git a/src/Microsoft.ML.Data/StaticPipe/Transformer.cs b/src/Microsoft.ML.StaticPipe/Transformer.cs similarity index 100% rename from src/Microsoft.ML.Data/StaticPipe/Transformer.cs rename to src/Microsoft.ML.StaticPipe/Transformer.cs diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 6a219a4362..9cd4091fda 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -14,6 +14,7 @@ using Microsoft.ML.Transforms.FeatureSelection; using Microsoft.ML.Transforms.Projections; using Microsoft.ML.Transforms.Text; +using static Microsoft.ML.Transforms.Text.TextFeaturizingEstimator; namespace Microsoft.ML.StaticPipe { @@ -1502,6 +1503,39 @@ public static VarVector IsMissingValue(this VarVector input) /// public static class TextFeaturizerStaticExtensions { + internal sealed class OutPipelineColumn : Vector + { + public readonly Scalar[] Inputs; + + public OutPipelineColumn(IEnumerable> inputs, Action advancedSettings) + : base(new Reconciler(advancedSettings), inputs.ToArray()) + { + Inputs = inputs.ToArray(); + } + } + + private sealed class Reconciler : EstimatorReconciler + { + private readonly Action _settings; + + public Reconciler(Action advancedSettings) + { + _settings = advancedSettings; + } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + Contracts.Assert(toOutput.Length == 1); + + var outCol = (OutPipelineColumn)toOutput[0]; + var inputs = outCol.Inputs.Select(x => inputNames[x]); + return new TextFeaturizingEstimator(env, inputs, outputNames[outCol], _settings); + } + } /// /// Accept text data and converts it to array which represent combinations of ngram/skip-gram token counts. /// @@ -1514,7 +1548,137 @@ public static Vector FeaturizeText(this Scalar input, Scalar[0]; - return new TextFeaturizingEstimator.OutPipelineColumn(new[] { input }.Concat(otherInputs), advancedSettings); + return new OutPipelineColumn(new[] { input }.Concat(otherInputs), advancedSettings); } } + + public static class RffStaticExtenensions + { + private readonly struct Config + { + public readonly int NewDim; + public readonly bool UseSin; + public readonly int? Seed; + public readonly IComponentFactory Generator; + + public Config(int newDim, bool useSin, IComponentFactory generator, int? seed = null) + { + NewDim = newDim; + UseSin = useSin; + Generator = generator; + Seed = seed; + } + } + private interface IColInput + { + PipelineColumn Input { get; } + Config Config { get; } + } + + private sealed class ImplVector : Vector, IColInput + { + public PipelineColumn Input { get; } + public Config Config { get; } + public ImplVector(PipelineColumn input, Config config) : base(Reconciler.Inst, input) + { + Input = input; + Config = config; + } + } + + private sealed class Reconciler : EstimatorReconciler + { + public static readonly Reconciler Inst = new Reconciler(); + + public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) + { + var infos = new RandomFourierFeaturizingTransformer.ColumnInfo[toOutput.Length]; + for (int i = 0; i < toOutput.Length; ++i) + { + var tcol = (IColInput)toOutput[i]; + infos[i] = new RandomFourierFeaturizingTransformer.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, tcol.Config.Generator, tcol.Config.Seed); + } + return new RandomFourierFeaturizingEstimator(env, infos); + } + } + + /// + /// It maps input to a random low-dimensional feature space. It is useful when data has non-linear features, since the transform + /// is designed so that the inner products of the transformed data are approximately equal to those in the feature space of a user + /// specified shift-invariant kernel. With this transform, we are able to use linear methods (which are scalable) to approximate more complex kernel SVM models. + /// + /// The column to apply Random Fourier transfomration. + /// Expected size of new vector. + /// Create two features for every random Fourier frequency? (one for cos and one for sin) + /// Which kernel to use. ( by default) + /// The seed of the random number generator for generating the new features. If not specified global random would be used. + public static Vector LowerVectorSizeWithRandomFourierTransformation(this Vector input, + int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim, bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin, + IComponentFactory generator = null, int? seed = null) + { + Contracts.CheckValue(input, nameof(input)); + return new ImplVector(input, new Config(newDim, useSin, generator, seed)); + } + } + + public static class PcaEstimatorExtensions + { + private sealed class OutPipelineColumn : Vector + { + public readonly Vector Input; + + public OutPipelineColumn(Vector input, string weightColumn, int rank, + int overSampling, bool center, int? seed = null) + : base(new Reconciler(weightColumn, rank, overSampling, center, seed), input) + { + Input = input; + } + } + + private sealed class Reconciler : EstimatorReconciler + { + private readonly PcaTransform.ColumnInfo _colInfo; + + public Reconciler(string weightColumn, int rank, int overSampling, bool center, int? seed = null) + { + _colInfo = new PcaTransform.ColumnInfo( + null, null, weightColumn, rank, overSampling, center, seed); + } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + Contracts.Assert(toOutput.Length == 1); + var outCol = (OutPipelineColumn)toOutput[0]; + var inputColName = inputNames[outCol.Input]; + var outputColName = outputNames[outCol]; + return new PrincipalComponentAnalysisEstimator(env, inputColName, outputColName, + _colInfo.WeightColumn, _colInfo.Rank, _colInfo.Oversampling, + _colInfo.Center, _colInfo.Seed); + } + } + + /// + /// Replaces the input vector with its projection to the principal component subspace, + /// which can significantly reduce size of vector. + /// + /// + /// The column to apply PCA to. + /// The name of the weight column. + /// The number of components in the PCA. + /// Oversampling parameter for randomized PCA training. + /// If enabled, data is centered to be zero mean. + /// The seed for random number generation + /// Vector containing the principal components. + public static Vector ToPrincipalComponents(this Vector input, + string weightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn, + int rank = PrincipalComponentAnalysisEstimator.Defaults.Rank, + int overSampling = PrincipalComponentAnalysisEstimator.Defaults.Oversampling, + bool center = PrincipalComponentAnalysisEstimator.Defaults.Center, + int? seed = null) => new OutPipelineColumn(input, weightColumn, rank, overSampling, center, seed); + } } diff --git a/src/Microsoft.ML.FastTree/TreeTrainersStatic.cs b/src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs similarity index 100% rename from src/Microsoft.ML.FastTree/TreeTrainersStatic.cs rename to src/Microsoft.ML.StaticPipe/TreeTrainersStatic.cs diff --git a/src/Microsoft.ML.Sweeper/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Sweeper/Properties/AssemblyInfo.cs index 160c67eb55..3bf539c0d5 100644 --- a/src/Microsoft.ML.Sweeper/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Sweeper/Properties/AssemblyInfo.cs @@ -5,5 +5,4 @@ using System.Runtime.CompilerServices; using Microsoft.ML; -[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Legacy" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.PipelineInference" + PublicKey.Value)] diff --git a/src/Microsoft.ML.TensorFlow.StaticPipe/Microsoft.ML.TensorFlow.StaticPipe.csproj b/src/Microsoft.ML.TensorFlow.StaticPipe/Microsoft.ML.TensorFlow.StaticPipe.csproj index e1de22bc59..76f95affa4 100644 --- a/src/Microsoft.ML.TensorFlow.StaticPipe/Microsoft.ML.TensorFlow.StaticPipe.csproj +++ b/src/Microsoft.ML.TensorFlow.StaticPipe/Microsoft.ML.TensorFlow.StaticPipe.csproj @@ -6,6 +6,7 @@ + diff --git a/src/Microsoft.ML.TensorFlow.StaticPipe/TensorFlowStaticExtensions.cs b/src/Microsoft.ML.TensorFlow.StaticPipe/TensorFlowStaticExtensions.cs index a80e7802ff..a2d9b46b40 100644 --- a/src/Microsoft.ML.TensorFlow.StaticPipe/TensorFlowStaticExtensions.cs +++ b/src/Microsoft.ML.TensorFlow.StaticPipe/TensorFlowStaticExtensions.cs @@ -1,4 +1,8 @@ -using System.Collections.Generic; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; using Microsoft.ML.Core.Data; using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; diff --git a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs index 341790d15b..49a9ec638a 100644 --- a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs +++ b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs @@ -103,14 +103,14 @@ public static Schema GetModelSchema(IExceptionContext ectx, string modelFile) var type = schema[i].Type; var metadataType = schema[i].Metadata.Schema.GetColumnOrNull(TensorFlowUtils.OpType)?.Type; - Contracts.Assert(metadataType != null && metadataType.IsText); + Contracts.Assert(metadataType != null && metadataType is TextType); ReadOnlyMemory opType = default; schema[i].Metadata.GetValue(TensorFlowUtils.OpType, ref opType); metadataType = schema[i].Metadata.Schema.GetColumnOrNull(TensorFlowUtils.InputOps)?.Type; VBuffer> inputOps = default; if (metadataType != null) { - Contracts.Assert(metadataType.IsKnownSizeVector && metadataType.ItemType.IsText); + Contracts.Assert(metadataType.IsKnownSizeVector && metadataType.ItemType is TextType); schema[i].Metadata.GetValue(TensorFlowUtils.InputOps, ref inputOps); } diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index b56253ae53..fd97523ae4 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -283,7 +283,7 @@ public static long[][] Train(IHostEnvironment env, IDataView input, string[] col } public static bool IsValidColumnType(ColumnType type) - => type == NumberType.R4 || type == NumberType.R8 || type.IsText; + => type == NumberType.R4 || type == NumberType.R8 || type is TextType; private static CountAggregator GetOneAggregator(Row row, ColumnType colType, int colSrc) { @@ -325,7 +325,7 @@ private sealed class CountAggregator : CountAggregator, IColumnAggregator getter) { - Contracts.Assert(type.IsPrimitive); + Contracts.Assert(type is PrimitiveType); _count = new long[1]; _buffer = new VBuffer(1, new T[1]); var t = default(T); diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 48c6529ea9..015b5d9c77 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -332,7 +332,7 @@ private int[] GetColumnIds(Schema schema, string[] names, Func); diff --git a/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs b/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs index 62fa259d66..c89de81e5a 100644 --- a/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs +++ b/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs @@ -254,7 +254,7 @@ private void AddMetadata(int iinfo, MetadataBuilder builder) int metaKeyValuesCol = 0; if (inputMetadata.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out metaKeyValuesCol)) typeNames = inputMetadata.Schema[metaKeyValuesCol].Type; - if (typeNames == null || !typeNames.IsKnownSizeVector || !typeNames.ItemType.IsText || + if (typeNames == null || !typeNames.IsKnownSizeVector || !(typeNames.ItemType is TextType) || typeNames.VectorSize != _infos[iinfo].TypeSrc.ItemType.KeyCount) { typeNames = null; @@ -320,7 +320,7 @@ private void GetSlotNames(int iinfo, ref VBuffer> dst) ColumnType typeSlotSrc = null; if (inputMetadata != null) typeSlotSrc = inputMetadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (typeSlotSrc != null && typeSlotSrc.VectorSize == typeSrc.VectorSize && typeSlotSrc.ItemType.IsText) + if (typeSlotSrc != null && typeSlotSrc.VectorSize == typeSrc.VectorSize && typeSlotSrc.ItemType is TextType) { inputMetadata.GetValue(MetadataUtils.Kinds.SlotNames, ref namesSlotSrc); Host.Check(namesSlotSrc.Length == typeSrc.VectorSize); @@ -478,12 +478,12 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); - if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType.IsPrimitive)) + if ((col.ItemType.ItemType.RawKind == default) || !(col.ItemType.IsVector || col.ItemType is PrimitiveType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); var metadata = new List(); if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.KeyValues, out var keyMeta)) - if (col.Kind != SchemaShape.Column.VectorKind.VariableVector && keyMeta.ItemType.IsText) + if (col.Kind != SchemaShape.Column.VectorKind.VariableVector && keyMeta.ItemType is TextType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, keyMeta.ItemType, false)); if (col.Kind == SchemaShape.Column.VectorKind.Scalar) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)); diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 3d43776377..df268e845e 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -150,7 +150,7 @@ private VectorType[] GetTypesAndMetadata() if (!type.IsKnownSizeVector || (typeNames = Source.Schema[Infos[iinfo].Source].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) == null || typeNames.VectorSize != type.VectorSize || - !typeNames.ItemType.IsText) + !(typeNames.ItemType is TextType)) { continue; } @@ -198,7 +198,7 @@ private void GetSlotNames(int iinfo, ref VBuffer> dst) // REVIEW: Do we need to verify that there is metadata or should we just call GetMetadata? var typeNames = Source.Schema[Infos[iinfo].Source].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type; - if (typeNames == null || typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText) + if (typeNames == null || typeNames.VectorSize != type.VectorSize || !(typeNames.ItemType is TextType)) throw MetadataUtils.ExceptGetMetadata(); var names = default(VBuffer>); diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index 43e18a9126..65ed5e4ef8 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -341,7 +341,7 @@ private void GetReplacementValues(IDataView input, ColumnInfo[] columns, out obj case ReplacementKind.Mean: case ReplacementKind.Minimum: case ReplacementKind.Maximum: - if (!type.ItemType.IsNumber) + if (!(type.ItemType is NumberType)) throw Host.Except("Cannot perform mean imputations on non-numeric '{0}'", type.ItemType); imputationModes[iinfo] = kind; Utils.Add(ref columnsToImpute, iinfo); diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs b/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs index fb5f6eaea4..21a4976041 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs @@ -15,7 +15,7 @@ public sealed partial class MissingValueReplacingTransformer { private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, ReplacementKind? kind, bool bySlot, RowCursor cursor, int col) { - ch.Assert(type.ItemType.IsNumber); + ch.Assert(type.ItemType is NumberType); if (!type.IsVector) { // The type is a scalar. diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index ec8cdb1709..2b014b1c8b 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -335,7 +335,7 @@ internal static bool IsValidColumnType(ColumnType type) { // REVIEW: Consider supporting all integer and unsigned types. return - (0 < type.KeyCount && type.KeyCount < Utils.ArrayMaxSize) || type.IsBool || + (0 < type.KeyCount && type.KeyCount < Utils.ArrayMaxSize) || type is BoolType || type == NumberType.R4 || type == NumberType.R8 || type == NumberType.I4; } @@ -468,7 +468,7 @@ private void GetLabels(Transposer trans, ColumnType labelType, int labelCol) BinDoubles(in tmp, ref labels, _numBins, out min, out lim); _numLabels = lim - min; } - else if (labelType.IsBool) + else if (labelType is BoolType) { var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); @@ -545,7 +545,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) BinDoubles(in src, ref dst, _numBins, out min, out lim); }); } - if (type.ItemType.IsBool) + if (type.ItemType is BoolType) { return ComputeMutualInformation(trans, col, (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => diff --git a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs index 443f2304db..5d1f1e64b7 100644 --- a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs @@ -7,4 +7,10 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.NeuralNetworks" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.TextAnalytics" + InternalPublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.TextAnalytics" + InternalPublicKey.Value)] + [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 5b65dda65a..66ff4c3f21 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.Collections.Generic; using System.Linq; using System.Text; using Microsoft.ML; @@ -14,8 +13,6 @@ using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; using Microsoft.ML.Numeric; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; using Microsoft.ML.Transforms.Projections; [assembly: LoadableClass(RandomFourierFeaturizingTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierFeaturizingTransformer), typeof(RandomFourierFeaturizingTransformer.Arguments), typeof(SignatureDataTransform), @@ -643,6 +640,7 @@ private void TransformFeatures(in VBuffer src, ref VBuffer dst, Tr /// public sealed class RandomFourierFeaturizingEstimator : IEstimator { + [BestFriend] internal static class Defaults { public const int NewDim = 1000; @@ -691,74 +689,4 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } } - - public static class RffExtenensions - { - private readonly struct Config - { - public readonly int NewDim; - public readonly bool UseSin; - public readonly int? Seed; - public readonly IComponentFactory Generator; - - public Config(int newDim, bool useSin, IComponentFactory generator, int? seed = null) - { - NewDim = newDim; - UseSin = useSin; - Generator = generator; - Seed = seed; - } - } - private interface IColInput - { - PipelineColumn Input { get; } - Config Config { get; } - } - - private sealed class ImplVector : Vector, IColInput - { - public PipelineColumn Input { get; } - public Config Config { get; } - public ImplVector(PipelineColumn input, Config config) : base(Reconciler.Inst, input) - { - Input = input; - Config = config; - } - } - - private sealed class Reconciler : EstimatorReconciler - { - public static readonly Reconciler Inst = new Reconciler(); - - public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) - { - var infos = new RandomFourierFeaturizingTransformer.ColumnInfo[toOutput.Length]; - for (int i = 0; i < toOutput.Length; ++i) - { - var tcol = (IColInput)toOutput[i]; - infos[i] = new RandomFourierFeaturizingTransformer.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, tcol.Config.Generator, tcol.Config.Seed); - } - return new RandomFourierFeaturizingEstimator(env, infos); - } - } - - /// - /// It maps input to a random low-dimensional feature space. It is useful when data has non-linear features, since the transform - /// is designed so that the inner products of the transformed data are approximately equal to those in the feature space of a user - /// specified shift-invariant kernel. With this transform, we are able to use linear methods (which are scalable) to approximate more complex kernel SVM models. - /// - /// The column to apply Random Fourier transfomration. - /// Expected size of new vector. - /// Create two features for every random Fourier frequency? (one for cos and one for sin) - /// Which kernel to use. ( by default) - /// The seed of the random number generator for generating the new features. If not specified global random would be used. - public static Vector LowerVectorSizeWithRandomFourierTransformation(this Vector input, - int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim, bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin, - IComponentFactory generator = null, int? seed = null) - { - Contracts.CheckValue(input, nameof(input)); - return new ImplVector(input, new Config(newDim, useSin, generator, seed)); - } - } } diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index adb667d0ae..75361ed1b9 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -540,7 +540,7 @@ internal static class Defaults public const Language DefaultLanguage = Language.English; } - public static bool IsColumnTypeValid(ColumnType type) => type.ItemType.IsText && type.IsVector; + public static bool IsColumnTypeValid(ColumnType type) => type.ItemType is TextType && type.IsVector; internal const string ExpectedColumnType = "vector of Text type"; @@ -582,7 +582,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); - if (col.Kind == SchemaShape.Column.VectorKind.Scalar || !col.ItemType.IsText) + if (col.Kind == SchemaShape.Column.VectorKind.Scalar || !(col.ItemType is TextType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input, ExpectedColumnType, col.ItemType.ToString()); result[colInfo.Output] = new SchemaShape.Column(colInfo.Output, SchemaShape.Column.VectorKind.VariableVector, TextType.Instance, false); } @@ -772,7 +772,7 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory stopwords, string d if (!loader.Schema.TryGetColumnIndex(srcCol, out colSrc)) throw ch.ExceptUserArg(nameof(Arguments.StopwordsColumn), "Unknown column '{0}'", srcCol); var typeSrc = loader.Schema[colSrc].Type; - ch.CheckUserArg(typeSrc.IsText, nameof(Arguments.StopwordsColumn), "Must be a scalar text column"); + ch.CheckUserArg(typeSrc is TextType, nameof(Arguments.StopwordsColumn), "Must be a scalar text column"); // Accumulate the stopwords. using (var cursor = loader.GetRowCursor(col => col == colSrc)) @@ -1072,7 +1072,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.input); - if (col.Kind == SchemaShape.Column.VectorKind.Scalar || !col.ItemType.IsText) + if (col.Kind == SchemaShape.Column.VectorKind.Scalar || !(col.ItemType is TextType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.input, ExpectedColumnType, col.ItemType.ToString()); result[colInfo.output] = new SchemaShape.Column(colInfo.output, SchemaShape.Column.VectorKind.VariableVector, TextType.Instance, false); } diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index 7a3d9cc97d..ed556a2b06 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -15,8 +15,6 @@ using Microsoft.ML.Internal.Internallearn; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.StaticPipe; -using Microsoft.ML.StaticPipe.Runtime; using Microsoft.ML.Transforms.Projections; using Microsoft.ML.Transforms.Text; @@ -476,7 +474,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(srcName, out var col)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName); - if (!col.ItemType.IsText) + if (!(col.ItemType is TextType)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName, "scalar or vector of text", col.GetTypeString()); } @@ -621,40 +619,5 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(Transformer).Assembly.FullName); } } - - [BestFriend] - internal sealed class OutPipelineColumn : Vector - { - public readonly Scalar[] Inputs; - - public OutPipelineColumn(IEnumerable> inputs, Action advancedSettings) - : base(new Reconciler(advancedSettings), inputs.ToArray()) - { - Inputs = inputs.ToArray(); - } - } - - private sealed class Reconciler : EstimatorReconciler - { - private readonly Action _settings; - - public Reconciler(Action advancedSettings) - { - _settings = advancedSettings; - } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - Contracts.Assert(toOutput.Length == 1); - - var outCol = (OutPipelineColumn)toOutput[0]; - var inputs = outCol.Inputs.Select(x => inputNames[x]); - return new TextFeaturizingEstimator(env, inputs, outputNames[outCol], _settings); - } - } } } diff --git a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs index ec7b809af8..9246ba196d 100644 --- a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs +++ b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs @@ -286,7 +286,7 @@ protected override Delegate MakeGetter(Row input, int iinfo, Func act disposer = null; var srcType = input.Schema[_parent.ColumnPairs[iinfo].input].Type; - Host.Assert(srcType.ItemType.IsText); + Host.Assert(srcType.ItemType is TextType); if (srcType.IsVector) { @@ -450,7 +450,7 @@ internal static class Defaults } - public static bool IsColumnTypeValid(ColumnType type) => (type.ItemType.IsText); + public static bool IsColumnTypeValid(ColumnType type) => (type.ItemType is TextType); internal const string ExpectedColumnType = "Text or vector of text."; diff --git a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs index 44474cd83c..ea89edd19c 100644 --- a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs +++ b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs @@ -554,7 +554,7 @@ internal static class Defaults { public const bool UseMarkerCharacters = true; } - public static bool IsColumnTypeValid(ColumnType type) => type.ItemType.IsText; + public static bool IsColumnTypeValid(ColumnType type) => type.ItemType is TextType; internal const string ExpectedColumnType = "Text"; diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs index 5ed2440b3c..90db6a0503 100644 --- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs @@ -285,7 +285,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV h.CheckNonWhiteSpace(col.Source, nameof(col.Source)); int colId; if (input.Schema.TryGetColumnIndex(col.Source, out colId) && - input.Schema[colId].Type.ItemType.IsText) + input.Schema[colId].Type.ItemType is TextType) { termCols.Add(col); isTermCol[i] = true; diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index 13ec4b694d..b129f4e99e 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -320,7 +320,7 @@ public override void Save(ModelSaveContext ctx) protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol) { var colType = inputSchema[srcCol].Type; - if (!(colType.IsVector && colType.ItemType.IsText)) + if (!(colType.IsVector && colType.ItemType is TextType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].input, "Text", inputSchema[srcCol].Type.ToString()); } @@ -571,7 +571,7 @@ private ValueGetter> GetGetterVec(Row input, int iinfo) var colType = input.Schema[ColMapNewToOld[iinfo]].Type; Host.Assert(colType.IsVector); - Host.Assert(colType.ItemType.IsText); + Host.Assert(colType.ItemType is TextType); var srcGetter = input.GetGetter>>(ColMapNewToOld[iinfo]); var src = default(VBuffer>); diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index a14fd70eba..53880db9d8 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -264,7 +264,7 @@ protected override Delegate MakeGetter(Row input, int iinfo, Func act input.Schema.TryGetColumnIndex(_parent._columns[iinfo].Input, out int srcCol); var srcType = input.Schema[srcCol].Type; - Host.Assert(srcType.ItemType.IsText); + Host.Assert(srcType.ItemType is TextType); if (!srcType.IsVector) return MakeGetterOne(input, iinfo); @@ -427,7 +427,7 @@ private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, JToken srcToken) /// public sealed class WordTokenizingEstimator : TrivialEstimator { - public static bool IsColumnTypeValid(ColumnType type) => type.ItemType.IsText; + public static bool IsColumnTypeValid(ColumnType type) => type.ItemType is TextType; internal const string ExpectedColumnType = "Text"; diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index 51d1c4a569..67c36a9aa1 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -286,7 +286,7 @@ private static void CheckAndBind(IExceptionContext ectx, Schema inputSchema, if (!inputSchema.TryGetColumnIndex(name, out col)) throw ectx.ExceptUserArg(nameof(Arguments.Column), "Pivot column '{0}' is not found", name); var colType = inputSchema[col].Type; - if (!colType.IsVector || !colType.ItemType.IsPrimitive) + if (!colType.IsVector || !(colType.ItemType is PrimitiveType)) throw ectx.ExceptUserArg(nameof(Arguments.Column), "Pivot column '{0}' has type '{1}', but must be a vector of primitive types", name, colType); infos[i] = new PivotColumnInfo(name, col, colType.VectorSize, (PrimitiveType)colType.ItemType); diff --git a/test/BaselineOutput/Common/Command/codegen-out.cs b/test/BaselineOutput/Common/Command/codegen-out.cs index 12857cc945..786042c403 100644 --- a/test/BaselineOutput/Common/Command/codegen-out.cs +++ b/test/BaselineOutput/Common/Command/codegen-out.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using Microsoft.ML.Legacy; using Microsoft.ML.Api; namespace MLGeneratedCode diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 75f83c5030..7eb7ebd712 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -2,7 +2,7 @@ Data.CustomTextLoader Import a dataset from a text file Microsoft.ML.EntryPoints Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.EntryPoints.DataViewReference ImportData Microsoft.ML.EntryPoints.DataViewReference+Input Microsoft.ML.EntryPoints.DataViewReference+Output Data.IDataViewArrayConverter Create an array variable of IDataView Microsoft.ML.EntryPoints.MacroUtils MakeArray Microsoft.ML.EntryPoints.MacroUtils+ArrayIDataViewInput Microsoft.ML.EntryPoints.MacroUtils+ArrayIDataViewOutput Data.PredictorModelArrayConverter Create an array variable of PredictorModel Microsoft.ML.EntryPoints.MacroUtils MakeArray Microsoft.ML.EntryPoints.MacroUtils+ArrayIPredictorModelInput Microsoft.ML.EntryPoints.MacroUtils+ArrayIPredictorModelOutput -Data.TextLoader Import a dataset from a text file Microsoft.ML.Legacy.EntryPoints.ImportTextData TextLoader Microsoft.ML.Legacy.EntryPoints.ImportTextData+LoaderInput Microsoft.ML.EntryPoints.ImportTextData+Output +Data.TextLoader Import a dataset from a text file Microsoft.ML.EntryPoints.ImportTextData TextLoader Microsoft.ML.EntryPoints.ImportTextData+LoaderInput Microsoft.ML.EntryPoints.ImportTextData+Output Models.AnomalyDetectionEvaluator Evaluates an anomaly detection scored dataset. Microsoft.ML.Data.Evaluate AnomalyDetection Microsoft.ML.Data.AnomalyDetectionMamlEvaluator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.AnomalyPipelineEnsemble Combine anomaly detection models into an ensemble Microsoft.ML.EntryPoints.EnsembleCreator CreateAnomalyPipelineEnsemble Microsoft.ML.EntryPoints.EnsembleCreator+PipelineAnomalyInput Microsoft.ML.EntryPoints.CommonOutputs+AnomalyDetectionOutput Models.BinaryClassificationEvaluator Evaluates a binary classification scored dataset. Microsoft.ML.Data.Evaluate Binary Microsoft.ML.Data.BinaryClassifierMamlEvaluator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+ClassificationEvaluateOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 3e95b0e09d..518b863f2f 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -464,9 +464,6 @@ "Type": "DataView", "Desc": "The resulting data view" } - ], - "InputKind": [ - "ILearningPipelineLoader" ] }, { @@ -29196,10 +29193,6 @@ } ] }, - { - "Kind": "ILearningPipelineLoader", - "Settings": [] - }, { "Kind": "IMulticlassClassificationOutput", "Settings": [] diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLightGBMSaveModelToOnnxTest.json b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLightGBMSaveModelToOnnxTest.json deleted file mode 100644 index 578322d150..0000000000 --- a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLightGBMSaveModelToOnnxTest.json +++ /dev/null @@ -1,315 +0,0 @@ -{ - "irVersion": "3", - "producerName": "ML.NET", - "producerVersion": "##VERSION##", - "domain": "Onnx", - "graph": { - "node": [ - { - "input": [ - "Features" - ], - "output": [ - "Score" - ], - "name": "TreeEnsembleRegressor", - "opType": "TreeEnsembleRegressor", - "attribute": [ - { - "name": "post_transform", - "s": "Tk9ORQ==", - "type": "STRING" - }, - { - "name": "n_targets", - "i": "1", - "type": "INT" - }, - { - "name": "base_values", - "floats": [ - 0 - ], - "type": "FLOATS" - }, - { - "name": "aggregate_function", - "s": "U1VN", - "type": "STRING" - }, - { - "name": "nodes_treeids", - "ints": [ - "0", - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "nodes_nodeids", - "ints": [ - "0", - "1", - "2" - ], - "type": "INTS" - }, - { - "name": "nodes_featureids", - "ints": [ - "1", - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "nodes_modes", - "strings": [ - "QlJBTkNIX0xFUQ==", - "TEVBRg==", - "TEVBRg==" - ], - "type": "STRINGS" - }, - { - "name": "nodes_values", - "floats": [ - 2.5, - 0, - 0 - ], - "type": "FLOATS" - }, - { - "name": "nodes_truenodeids", - "ints": [ - "1", - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "nodes_falsenodeids", - "ints": [ - "2", - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "nodes_missing_value_tracks_true", - "ints": [ - "0", - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "target_treeids", - "ints": [ - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "target_nodeids", - "ints": [ - "1", - "2" - ], - "type": "INTS" - }, - { - "name": "target_ids", - "ints": [ - "0", - "0" - ], - "type": "INTS" - }, - { - "name": "target_weights", - "floats": [ - -1.799208, - -0.34535858 - ], - "type": "FLOATS" - } - ], - "domain": "ai.onnx.ml" - }, - { - "input": [ - "Score" - ], - "output": [ - "linearOutput" - ], - "name": "Affine", - "opType": "Affine", - "attribute": [ - { - "name": "alpha", - "f": 0.5, - "type": "FLOAT" - }, - { - "name": "beta", - "f": -1E-07, - "type": "FLOAT" - } - ] - }, - { - "input": [ - "linearOutput" - ], - "output": [ - "Probability" - ], - "name": "Sigmoid", - "opType": "Sigmoid" - }, - { - "input": [ - "Probability" - ], - "output": [ - "PredictedLabel" - ], - "name": "Binarizer", - "opType": "Binarizer", - "attribute": [ - { - "name": "threshold", - "f": 0.5, - "type": "FLOAT" - } - ], - "domain": "ai.onnx.ml" - }, - { - "input": [ - "PredictedLabel" - ], - "output": [ - "PredictedLabel0" - ], - "name": "Identity", - "opType": "Identity" - }, - { - "input": [ - "Score" - ], - "output": [ - "Score0" - ], - "name": "Identity0", - "opType": "Identity" - }, - { - "input": [ - "Probability" - ], - "output": [ - "Probability0" - ], - "name": "Identity1", - "opType": "Identity" - } - ], - "name": "BinaryClassificationLightGBMSaveModelToOnnxTest", - "input": [ - { - "name": "Features", - "type": { - "tensorType": { - "elemType": "FLOAT", - "shape": { - "dim": [ - { - "dimValue": "1" - }, - { - "dimValue": "9" - } - ] - } - } - } - } - ], - "output": [ - { - "name": "PredictedLabel0", - "type": { - "tensorType": { - "elemType": "FLOAT", - "shape": { - "dim": [ - { - "dimValue": "1" - }, - { - "dimValue": "1" - } - ] - } - } - } - }, - { - "name": "Score0", - "type": { - "tensorType": { - "elemType": "FLOAT", - "shape": { - "dim": [ - { - "dimValue": "1" - }, - { - "dimValue": "1" - } - ] - } - } - } - }, - { - "name": "Probability0", - "type": { - "tensorType": { - "elemType": "FLOAT", - "shape": { - "dim": [ - { - "dimValue": "1" - }, - { - "dimValue": "1" - } - ] - } - } - } - } - ] - }, - "opsetImport": [ - { - "domain": "ai.onnx.ml", - "version": "1" - }, - { - "version": "7" - } - ] -} \ No newline at end of file diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationFastTreeSaveModelToOnnxTest.json b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ExcludeVariablesInOnnxConversion.txt similarity index 91% rename from test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationFastTreeSaveModelToOnnxTest.json rename to test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ExcludeVariablesInOnnxConversion.txt index b032fc1aaf..0e2a01ba0d 100644 --- a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationFastTreeSaveModelToOnnxTest.json +++ b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ExcludeVariablesInOnnxConversion.txt @@ -2,61 +2,9 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ - { - "input": [ - "F1" - ], - "output": [ - "F10" - ], - "name": "Imputer", - "opType": "Imputer", - "attribute": [ - { - "name": "replaced_value_float", - "f": "NaN", - "type": "FLOAT" - }, - { - "name": "imputed_value_floats", - "floats": [ - 0 - ], - "type": "FLOATS" - } - ], - "domain": "ai.onnx.ml" - }, - { - "input": [ - "F10" - ], - "output": [ - "F11" - ], - "name": "Scaler", - "opType": "Scaler", - "attribute": [ - { - "name": "offset", - "floats": [ - 0 - ], - "type": "FLOATS" - }, - { - "name": "scale", - "floats": [ - 0.1 - ], - "type": "FLOATS" - } - ], - "domain": "ai.onnx.ml" - }, { "input": [ "F2" @@ -101,7 +49,7 @@ "F20" ], "output": [ - "F21" + "encoded" ], "name": "OneHotEncoder", "opType": "OneHotEncoder", @@ -132,9 +80,57 @@ }, { "input": [ - "F11", + "encoded" + ], + "output": [ + "F21" + ], + "name": "ReduceSum", + "opType": "ReduceSum", + "attribute": [ + { + "name": "axes", + "ints": [ + "1" + ], + "type": "INTS" + }, + { + "name": "keepdims", + "type": "INT" + } + ] + }, + { + "input": [ "F21" ], + "output": [ + "F22" + ], + "name": "Imputer", + "opType": "Imputer", + "attribute": [ + { + "name": "replaced_value_float", + "f": "NaN", + "type": "FLOAT" + }, + { + "name": "imputed_value_floats", + "floats": [ + 0 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "F1", + "F22" + ], "output": [ "Features" ], @@ -156,6 +152,53 @@ "input": [ "Features" ], + "output": [ + "Features0" + ], + "name": "Scaler", + "opType": "Scaler", + "attribute": [ + { + "name": "offset", + "floats": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "type": "FLOATS" + }, + { + "name": "scale", + "floats": [ + 0.1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Features0" + ], "output": [ "Score" ], @@ -372,7 +415,7 @@ "opType": "Identity" } ], - "name": "BinaryClassificationFastTreeSaveModelToOnnxTest", + "name": "A Simple Pipeline", "input": [ { "name": "F1", @@ -469,10 +512,10 @@ ], "valueInfo": [ { - "name": "F10", + "name": "F20", "type": { "tensorType": { - "elemType": "FLOAT", + "elemType": "INT64", "shape": { "dim": [ { @@ -487,7 +530,7 @@ } }, { - "name": "F11", + "name": "F21", "type": { "tensorType": { "elemType": "FLOAT", @@ -497,7 +540,7 @@ "dimValue": "1" }, { - "dimValue": "1" + "dimValue": "10" } ] } @@ -505,17 +548,17 @@ } }, { - "name": "F20", + "name": "F22", "type": { "tensorType": { - "elemType": "INT64", + "elemType": "FLOAT", "shape": { "dim": [ { "dimValue": "1" }, { - "dimValue": "1" + "dimValue": "10" } ] } @@ -523,7 +566,7 @@ } }, { - "name": "F21", + "name": "Features", "type": { "tensorType": { "elemType": "FLOAT", @@ -533,7 +576,7 @@ "dimValue": "1" }, { - "dimValue": "10" + "dimValue": "11" } ] } @@ -541,7 +584,7 @@ } }, { - "name": "Features", + "name": "Features0", "type": { "tensorType": { "elemType": "FLOAT", diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LightGbmBinaryClassificationOnnxConversionTest.txt b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LightGbmBinaryClassificationOnnxConversionTest.txt new file mode 100644 index 0000000000..a6abf86b57 --- /dev/null +++ b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LightGbmBinaryClassificationOnnxConversionTest.txt @@ -0,0 +1,532 @@ +{ + "irVersion": "3", + "producerName": "ML.NET", + "producerVersion": "##VERSION##", + "domain": "machinelearning.dotnet", + "graph": { + "node": [ + { + "input": [ + "FeatureVector" + ], + "output": [ + "FeatureVector0" + ], + "name": "Scaler", + "opType": "Scaler", + "attribute": [ + { + "name": "offset", + "floats": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "type": "FLOATS" + }, + { + "name": "scale", + "floats": [ + 0.2544529, + 0.3184713, + 0.259740263, + 0.324675322, + 0.3144654, + 0.332225919, + 0.3436426, + 0.321543425, + 0.30864197, + 0.3154574, + 0.344827563 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "FeatureVector0" + ], + "output": [ + "Score" + ], + "name": "TreeEnsembleRegressor", + "opType": "TreeEnsembleRegressor", + "attribute": [ + { + "name": "post_transform", + "s": "Tk9ORQ==", + "type": "STRING" + }, + { + "name": "n_targets", + "i": "1", + "type": "INT" + }, + { + "name": "base_values", + "floats": [ + 0 + ], + "type": "FLOATS" + }, + { + "name": "aggregate_function", + "s": "U1VN", + "type": "STRING" + }, + { + "name": "nodes_treeids", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "2", + "2", + "2", + "2", + "2" + ], + "type": "INTS" + }, + { + "name": "nodes_nodeids", + "ints": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "0", + "1", + "2", + "3", + "4" + ], + "type": "INTS" + }, + { + "name": "nodes_featureids", + "ints": [ + "0", + "10", + "10", + "0", + "0", + "0", + "0", + "2", + "0", + "10", + "0", + "0", + "0", + "0", + "10", + "2", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_modes", + "strings": [ + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==" + ], + "type": "STRINGS" + }, + { + "name": "nodes_values", + "floats": [ + 0.0292620845, + -0.07068965, + 0.106896549, + 0, + 0, + 0, + 0, + 0.019480519, + -0.03689567, + 0.0741379261, + 0, + 0, + 0, + 0, + -0.09310344, + -0.035064932, + 0, + 0, + 0 + ], + "type": "FLOATS" + }, + { + "name": "nodes_truenodeids", + "ints": [ + "1", + "3", + "4", + "0", + "0", + "0", + "0", + "1", + "3", + "4", + "0", + "0", + "0", + "0", + "2", + "3", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_falsenodeids", + "ints": [ + "2", + "5", + "6", + "0", + "0", + "0", + "0", + "2", + "5", + "6", + "0", + "0", + "0", + "0", + "1", + "4", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_missing_value_tracks_true", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "target_treeids", + "ints": [ + "0", + "0", + "0", + "0", + "1", + "1", + "1", + "1", + "2", + "2", + "2" + ], + "type": "INTS" + }, + { + "name": "target_nodeids", + "ints": [ + "3", + "4", + "5", + "6", + "3", + "4", + "5", + "6", + "2", + "3", + "4" + ], + "type": "INTS" + }, + { + "name": "target_ids", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "target_weights", + "floats": [ + 349.8905, + 384.05722, + 378.0647, + 408.230774, + -25.229887, + 2.39983654, + -1.27583647, + 25.22761, + -13.8603878, + -3.890484, + 16.6648388 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "FeatureVector0" + ], + "output": [ + "FeatureVector1" + ], + "name": "Identity", + "opType": "Identity" + }, + { + "input": [ + "Target" + ], + "output": [ + "Target0" + ], + "name": "Identity0", + "opType": "Identity" + }, + { + "input": [ + "Score" + ], + "output": [ + "Score0" + ], + "name": "Identity1", + "opType": "Identity" + } + ], + "name": "model", + "input": [ + { + "name": "FeatureVector", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Target", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ], + "output": [ + { + "name": "FeatureVector1", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Target0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Score0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ], + "valueInfo": [ + { + "name": "FeatureVector0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Score", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ] + }, + "opsetImport": [ + { + "domain": "ai.onnx.ml", + "version": "1" + }, + { + "version": "7" + } + ] +} \ No newline at end of file diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLRSaveModelToOnnxTest.json b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LogisticRegressionSaveModelToOnnxTest.txt similarity index 68% rename from test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLRSaveModelToOnnxTest.json rename to test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LogisticRegressionSaveModelToOnnxTest.txt index 217e7b1fbb..30c26b8f90 100644 --- a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/BinaryClassificationLRSaveModelToOnnxTest.json +++ b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/LogisticRegressionSaveModelToOnnxTest.txt @@ -2,15 +2,15 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ { "input": [ - "Features" + "FeatureVector" ], "output": [ - "Features0" + "FeatureVector0" ], "name": "Scaler", "opType": "Scaler", @@ -26,6 +26,8 @@ 0, 0, 0, + 0, + 0, 0 ], "type": "FLOATS" @@ -33,15 +35,17 @@ { "name": "scale", "floats": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 + 0.2544529, + 0.3184713, + 0.259740263, + 0.324675322, + 0.3144654, + 0.332225919, + 0.3436426, + 0.321543425, + 0.30864197, + 0.3154574, + 0.344827563 ], "type": "FLOATS" } @@ -50,7 +54,7 @@ }, { "input": [ - "Features0" + "FeatureVector0" ], "output": [ "Score" @@ -71,22 +75,24 @@ { "name": "coefficients", "floats": [ - 2.6596148, - 1.67937, - 1.94177353, - 1.42409551, - 0.852847636, - 2.93048549, - 1.74959826, - 1.58030283, - 0.5948697 + 300.146545, + -0.224054649, + 286.670166, + -0.8098665, + -0.8306167, + -0.9863483, + 55.934185, + 12.6538782, + -0.265024453, + 0.6916245, + 238.776855 ], "type": "FLOATS" }, { "name": "intercepts", "floats": [ - -6.183617 + 373.62085 ], "type": "FLOATS" } @@ -95,89 +101,57 @@ }, { "input": [ - "Score" - ], - "output": [ - "linearOutput" - ], - "name": "Affine", - "opType": "Affine", - "attribute": [ - { - "name": "alpha", - "f": 1, - "type": "FLOAT" - }, - { - "name": "beta", - "f": -1E-07, - "type": "FLOAT" - } - ] - }, - { - "input": [ - "linearOutput" - ], - "output": [ - "Probability" - ], - "name": "Sigmoid", - "opType": "Sigmoid" - }, - { - "input": [ - "Probability" - ], - "output": [ - "PredictedLabel" - ], - "name": "Binarizer", - "opType": "Binarizer", - "attribute": [ - { - "name": "threshold", - "f": 0.5, - "type": "FLOAT" - } - ], - "domain": "ai.onnx.ml" - }, - { - "input": [ - "PredictedLabel" + "FeatureVector0" ], "output": [ - "PredictedLabel0" + "FeatureVector1" ], "name": "Identity", "opType": "Identity" }, { "input": [ - "Score" + "Target" ], "output": [ - "Score0" + "Target0" ], "name": "Identity0", "opType": "Identity" }, { "input": [ - "Probability" + "Score" ], "output": [ - "Probability0" + "Score0" ], "name": "Identity1", "opType": "Identity" } ], - "name": "BinaryClassificationLRSaveModelToOnnxTest", + "name": "model", "input": [ { - "name": "Features", + "name": "FeatureVector", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Target", "type": { "tensorType": { "elemType": "FLOAT", @@ -187,7 +161,7 @@ "dimValue": "1" }, { - "dimValue": "9" + "dimValue": "1" } ] } @@ -197,7 +171,7 @@ ], "output": [ { - "name": "PredictedLabel0", + "name": "FeatureVector1", "type": { "tensorType": { "elemType": "FLOAT", @@ -207,7 +181,7 @@ "dimValue": "1" }, { - "dimValue": "1" + "dimValue": "11" } ] } @@ -215,7 +189,7 @@ } }, { - "name": "Score0", + "name": "Target0", "type": { "tensorType": { "elemType": "FLOAT", @@ -233,7 +207,7 @@ } }, { - "name": "Probability0", + "name": "Score0", "type": { "tensorType": { "elemType": "FLOAT", @@ -253,7 +227,25 @@ ], "valueInfo": [ { - "name": "Features0", + "name": "FeatureVector0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Score", "type": { "tensorType": { "elemType": "FLOAT", @@ -263,7 +255,7 @@ "dimValue": "1" }, { - "dimValue": "9" + "dimValue": "1" } ] } diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ModelWithLessIO.txt b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ModelWithLessIO.txt new file mode 100644 index 0000000000..b52cce8e97 --- /dev/null +++ b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/ModelWithLessIO.txt @@ -0,0 +1,906 @@ +{ + "irVersion": "3", + "producerName": "ML.NET", + "producerVersion": "##VERSION##", + "domain": "machinelearning.dotnet", + "graph": { + "node": [ + { + "input": [ + "F2" + ], + "output": [ + "F20" + ], + "name": "LabelEncoder", + "opType": "LabelEncoder", + "attribute": [ + { + "name": "classes_strings", + "strings": [ + "MQ==", + "NQ==", + "NA==", + "Mg==", + "Mw==", + "Nw==", + "MTA=", + "OA==", + "Ng==" + ], + "type": "STRINGS" + }, + { + "name": "default_int64", + "i": "-1", + "type": "INT" + }, + { + "name": "default_string", + "s": "IA==", + "type": "STRING" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "F20" + ], + "output": [ + "F21" + ], + "name": "OneHotEncoder", + "opType": "OneHotEncoder", + "attribute": [ + { + "name": "cats_int64s", + "ints": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8" + ], + "type": "INTS" + }, + { + "name": "zeros", + "i": "1", + "type": "INT" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "F1", + "F21" + ], + "output": [ + "Features" + ], + "name": "FeatureVectorizer", + "opType": "FeatureVectorizer", + "attribute": [ + { + "name": "inputdimensions", + "ints": [ + "8", + "9" + ], + "type": "INTS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Features" + ], + "output": [ + "Score" + ], + "name": "TreeEnsembleRegressor", + "opType": "TreeEnsembleRegressor", + "attribute": [ + { + "name": "post_transform", + "s": "Tk9ORQ==", + "type": "STRING" + }, + { + "name": "n_targets", + "i": "1", + "type": "INT" + }, + { + "name": "base_values", + "floats": [ + 0 + ], + "type": "FLOATS" + }, + { + "name": "aggregate_function", + "s": "U1VN", + "type": "STRING" + }, + { + "name": "nodes_treeids", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2" + ], + "type": "INTS" + }, + { + "name": "nodes_nodeids", + "ints": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14" + ], + "type": "INTS" + }, + { + "name": "nodes_featureids", + "ints": [ + "1", + "2", + "5", + "1", + "5", + "5", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "2", + "6", + "1", + "5", + "4", + "5", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "5", + "7", + "3", + "0", + "6", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_modes", + "strings": [ + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "QlJBTkNIX0xFUQ==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==", + "TEVBRg==" + ], + "type": "STRINGS" + }, + { + "name": "nodes_values", + "floats": [ + 2.5, + 2.5, + 4.5, + 4.5, + 2.5, + 1.5, + 6.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2.5, + 2.5, + 4.5, + 4.5, + 2.5, + 2.5, + 3.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3.5, + 5.5, + 3.5, + 5.5, + 6.5, + 4.5, + 5.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "type": "FLOATS" + }, + { + "name": "nodes_truenodeids", + "ints": [ + "2", + "5", + "7", + "4", + "9", + "8", + "12", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "2", + "5", + "6", + "4", + "9", + "8", + "7", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "2", + "6", + "4", + "5", + "8", + "7", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_falsenodeids", + "ints": [ + "1", + "3", + "10", + "11", + "6", + "13", + "14", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "3", + "10", + "11", + "12", + "13", + "14", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "3", + "9", + "10", + "11", + "12", + "13", + "14", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "nodes_missing_value_tracks_true", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "target_treeids", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "1", + "2", + "2", + "2", + "2", + "2", + "2", + "2", + "2" + ], + "type": "INTS" + }, + { + "name": "target_nodeids", + "ints": [ + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14" + ], + "type": "INTS" + }, + { + "name": "target_ids", + "ints": [ + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0", + "0" + ], + "type": "INTS" + }, + { + "name": "target_weights", + "floats": [ + -0.9850374, + -1, + -0.428571433, + 0.05882353, + 0.9655172, + 0.478260875, + 7.006492E-45, + 0.9354839, + -0.837172, + -0.896625638, + -0.3455931, + 0.223126009, + 0.8040303, + 0.60825175, + -0.06932944, + -0.402043074, + -0.7417274, + -0.408434927, + 0.7105746, + 0.1875386, + 0.7631735, + 0.706173241, + 0.625906467, + -0.35968104 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Score" + ], + "output": [ + "linearOutput" + ], + "name": "Affine", + "opType": "Affine", + "attribute": [ + { + "name": "alpha", + "f": 0.4, + "type": "FLOAT" + }, + { + "name": "beta", + "f": -1E-07, + "type": "FLOAT" + } + ] + }, + { + "input": [ + "linearOutput" + ], + "output": [ + "Probability" + ], + "name": "Sigmoid", + "opType": "Sigmoid" + }, + { + "input": [ + "Probability" + ], + "output": [ + "PredictedLabel" + ], + "name": "Binarizer", + "opType": "Binarizer", + "attribute": [ + { + "name": "threshold", + "f": 0.5, + "type": "FLOAT" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "PredictedLabel" + ], + "output": [ + "PredictedLabel0" + ], + "name": "Identity", + "opType": "Identity" + }, + { + "input": [ + "Score" + ], + "output": [ + "Score0" + ], + "name": "Identity0", + "opType": "Identity" + }, + { + "input": [ + "Probability" + ], + "output": [ + "Probability0" + ], + "name": "Identity1", + "opType": "Identity" + } + ], + "name": "modelWithLessIO", + "input": [ + { + "name": "F1", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "8" + } + ] + } + } + } + }, + { + "name": "F2", + "type": { + "tensorType": { + "elemType": "STRING", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ], + "output": [ + { + "name": "PredictedLabel0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Score0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Probability0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ], + "valueInfo": [ + { + "name": "F20", + "type": { + "tensorType": { + "elemType": "INT64", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "F21", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "9" + } + ] + } + } + } + }, + { + "name": "Features", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "17" + } + ] + } + } + } + } + ] + }, + "opsetImport": [ + { + "domain": "ai.onnx.ml", + "version": "1" + }, + { + "version": "7" + } + ] +} \ No newline at end of file diff --git a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/KeyToVectorBag.json b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/OneHotBagPipeline.txt similarity index 74% rename from test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/KeyToVectorBag.json rename to test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/OneHotBagPipeline.txt index aa498a07ad..d66e9291fe 100644 --- a/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/KeyToVectorBag.json +++ b/test/BaselineOutput/Common/Onnx/BinaryClassification/BreastCancer/OneHotBagPipeline.txt @@ -2,7 +2,7 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ { @@ -103,9 +103,34 @@ }, { "input": [ - "F1", "F21" ], + "output": [ + "F22" + ], + "name": "Imputer", + "opType": "Imputer", + "attribute": [ + { + "name": "replaced_value_float", + "f": "NaN", + "type": "FLOAT" + }, + { + "name": "imputed_value_floats", + "floats": [ + 0 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "F1", + "F22" + ], "output": [ "Features" ], @@ -312,6 +337,46 @@ ], "domain": "ai.onnx.ml" }, + { + "input": [ + "Label" + ], + "output": [ + "Label0" + ], + "name": "Identity", + "opType": "Identity" + }, + { + "input": [ + "F1" + ], + "output": [ + "F10" + ], + "name": "Identity0", + "opType": "Identity" + }, + { + "input": [ + "F22" + ], + "output": [ + "F23" + ], + "name": "Identity1", + "opType": "Identity" + }, + { + "input": [ + "Features" + ], + "output": [ + "Features0" + ], + "name": "Identity2", + "opType": "Identity" + }, { "input": [ "PredictedLabel" @@ -319,7 +384,7 @@ "output": [ "PredictedLabel0" ], - "name": "Identity", + "name": "Identity3", "opType": "Identity" }, { @@ -329,7 +394,7 @@ "output": [ "Score0" ], - "name": "Identity0", + "name": "Identity4", "opType": "Identity" }, { @@ -339,12 +404,30 @@ "output": [ "Probability0" ], - "name": "Identity1", + "name": "Identity5", "opType": "Identity" } ], - "name": "KeyToVectorBag", + "name": "model", "input": [ + { + "name": "Label", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, { "name": "F1", "type": { @@ -383,6 +466,78 @@ } ], "output": [ + { + "name": "Label0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "F10", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "F23", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "10" + } + ] + } + } + } + }, + { + "name": "Features0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, { "name": "PredictedLabel0", "type": { @@ -475,6 +630,24 @@ } } }, + { + "name": "F22", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "10" + } + ] + } + } + } + }, { "name": "Features", "type": { diff --git a/test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.json b/test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.txt similarity index 85% rename from test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.json rename to test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.txt index d74ebe1c3f..8c832f9db8 100644 --- a/test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.json +++ b/test/BaselineOutput/Common/Onnx/Cluster/BreastCancer/Kmeans.txt @@ -2,7 +2,7 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ { @@ -157,44 +157,64 @@ "opType": "Identity" } ], - "name": "Kmeans", + "name": "model", "initializer": [ { "dims": [ - "2", + "4", "9" ], "dataType": "FLOAT", "floatData": [ - 0.5522167, - 0.3039403, - 0.319211155, - 0.261575729, - 0.320196062, - 0.344088882, - 0.293349, - 0.273151934, - 0.15763472, - 0.285144627, - 0.332245946, - 0.325724274, - 0.315217048, - 0.328623, - 0.3706516, - 0.41992715, - 0.307970464, - 0.164492577 + 0.625973761, + 0.424676031, + 0.4348058, + 0.378701448, + 0.395843625, + 0.489611953, + 0.414545476, + 0.407013685, + 0.193505809, + 0.403125, + 0.684375, + 0.6375, + 0.50625, + 0.478124976, + 0.665625036, + 0.784374952, + 0.349999964, + 0.196874976, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0.184905529, + 0.112075679, + 0.119622864, + 0.117736049, + 0.200000286, + 0.1215097, + 0.189811155, + 0.10566061, + 0.107924782 ], "name": "C" }, { "dims": [ - "2" + "4" ], "dataType": "FLOAT", "floatData": [ - 0.9740776, - 0.940771043 + 1.67602837, + 2.74173832, + 0, + 0.188527346 ], "name": "C2" }, @@ -277,7 +297,7 @@ "dimValue": "1" }, { - "dimValue": "2" + "dimValue": "4" } ] } diff --git a/test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLogisticRegressionSaveModelToOnnxTest.txt b/test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLogisticRegressionSaveModelToOnnxTest.txt new file mode 100644 index 0000000000..c614821800 --- /dev/null +++ b/test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLogisticRegressionSaveModelToOnnxTest.txt @@ -0,0 +1,432 @@ +{ + "irVersion": "3", + "producerName": "ML.NET", + "producerVersion": "##VERSION##", + "domain": "machinelearning.dotnet", + "graph": { + "node": [ + { + "input": [ + "Features" + ], + "output": [ + "Features0" + ], + "name": "Scaler", + "opType": "Scaler", + "attribute": [ + { + "name": "offset", + "floats": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "type": "FLOATS" + }, + { + "name": "scale", + "floats": [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ], + "type": "FLOATS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Label" + ], + "output": [ + "Label0" + ], + "name": "LabelEncoder", + "opType": "LabelEncoder", + "attribute": [ + { + "name": "classes_strings", + "strings": [ + "NQ==", + "Mw==", + "Ng==", + "NA==", + "OA==", + "MQ==", + "Mg==", + "Nw==", + "MTA=", + "OQ==" + ], + "type": "STRINGS" + }, + { + "name": "default_int64", + "i": "-1", + "type": "INT" + }, + { + "name": "default_string", + "s": "IA==", + "type": "STRING" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Features0" + ], + "output": [ + "PredictedLabel", + "Score" + ], + "name": "LinearClassifier", + "opType": "LinearClassifier", + "attribute": [ + { + "name": "post_transform", + "s": "Tk9ORQ==", + "type": "STRING" + }, + { + "name": "multi_class", + "i": "1", + "type": "INT" + }, + { + "name": "coefficients", + "floats": [ + -0.0871891156, + 0.209310874, + 0.747134566, + 0.364765137, + -0.377612084, + -0.6847462, + 0, + -0.5566554, + -0.3849638, + -1.29262471, + 0, + 0, + -0.479907274, + -0.08740093, + -0.5489706, + 0, + 0.630316138, + 0, + 0, + 0, + 0.07319626, + 0.171390951, + 0.6936194, + 0, + 0, + -0.6189027, + 0, + -0.732489467, + -0.71812433, + 0.2614429, + -0.4669126, + -0.250123739, + 1.01838875, + 0.7936676, + 0, + 0, + 0.8072781, + 0, + 0.833407462, + 0, + -1.67462111, + -1.19559848, + -0.553805768, + -0.5710498, + -0.7325714, + -0.5470721, + -0.7483947, + 0, + -0.5655844, + -0.9892823, + -0.237264976, + 0, + -0.81984, + -0.0930810943, + -0.4526821, + 0, + 0, + 0, + 0.726712048, + 0, + 1.12171924, + 0.323810369, + 0.245762676, + 0.07872447, + 0.939905643, + 0.923160553, + 0, + 0, + 1.10209334, + 0.704743862, + 0, + 0.322121173, + 0.5064917, + 1.30212963, + 0, + 0.8623323, + 0.0155395176, + 0, + 0.192209348, + 0.262786478 + ], + "type": "FLOATS" + }, + { + "name": "intercepts", + "floats": [ + 1.23585367, + 1.68783426, + -0.8096311, + 1.35599542, + -1.59806383, + 2.57355452, + 1.03064489, + -1.67592752, + -1.40655541, + -2.39366078 + ], + "type": "FLOATS" + }, + { + "name": "classlabels_ints", + "ints": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], + "type": "INTS" + } + ], + "domain": "ai.onnx.ml" + }, + { + "input": [ + "Label0" + ], + "output": [ + "Label1" + ], + "name": "Identity", + "opType": "Identity" + }, + { + "input": [ + "Features0" + ], + "output": [ + "Features1" + ], + "name": "Identity0", + "opType": "Identity" + }, + { + "input": [ + "PredictedLabel" + ], + "output": [ + "PredictedLabel0" + ], + "name": "Identity1", + "opType": "Identity" + }, + { + "input": [ + "Score" + ], + "output": [ + "Score0" + ], + "name": "Identity2", + "opType": "Identity" + } + ], + "name": "model", + "input": [ + { + "name": "Label", + "type": { + "tensorType": { + "elemType": "STRING", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Features", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "8" + } + ] + } + } + } + } + ], + "output": [ + { + "name": "Label1", + "type": { + "tensorType": { + "elemType": "INT64", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Features1", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "8" + } + ] + } + } + } + }, + { + "name": "PredictedLabel0", + "type": { + "tensorType": { + "elemType": "INT64", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + }, + { + "name": "Score0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "10" + } + ] + } + } + } + } + ], + "valueInfo": [ + { + "name": "Features0", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "8" + } + ] + } + } + } + }, + { + "name": "Label0", + "type": { + "tensorType": { + "elemType": "INT64", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" + } + ] + } + } + } + } + ] + }, + "opsetImport": [ + { + "domain": "ai.onnx.ml", + "version": "1" + }, + { + "version": "7" + } + ] +} \ No newline at end of file diff --git a/test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLRSaveModelToOnnxTest.json b/test/BaselineOutput/Common/Onnx/Regression/Adult/SimplePipeline.txt similarity index 55% rename from test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLRSaveModelToOnnxTest.json rename to test/BaselineOutput/Common/Onnx/Regression/Adult/SimplePipeline.txt index f7976875f1..30c26b8f90 100644 --- a/test/BaselineOutput/Common/Onnx/MultiClassClassification/BreastCancer/MultiClassificationLRSaveModelToOnnxTest.json +++ b/test/BaselineOutput/Common/Onnx/Regression/Adult/SimplePipeline.txt @@ -2,15 +2,15 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ { "input": [ - "Features" + "FeatureVector" ], "output": [ - "Features0" + "FeatureVector0" ], "name": "Scaler", "opType": "Scaler", @@ -26,6 +26,8 @@ 0, 0, 0, + 0, + 0, 0 ], "type": "FLOATS" @@ -33,15 +35,17 @@ { "name": "scale", "floats": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 + 0.2544529, + 0.3184713, + 0.259740263, + 0.324675322, + 0.3144654, + 0.332225919, + 0.3436426, + 0.321543425, + 0.30864197, + 0.3154574, + 0.344827563 ], "type": "FLOATS" } @@ -50,14 +54,13 @@ }, { "input": [ - "Features0" + "FeatureVector0" ], "output": [ - "PredictedLabel", "Score" ], - "name": "LinearClassifier", - "opType": "LinearClassifier", + "name": "LinearRegressor", + "opType": "LinearRegressor", "attribute": [ { "name": "post_transform", @@ -65,63 +68,57 @@ "type": "STRING" }, { - "name": "multi_class", + "name": "targets", "i": "1", "type": "INT" }, { "name": "coefficients", "floats": [ - -1.58059466, - -0.82541883, - -1.05039084, - -0.792811334, - -0.385914773, - -1.59029973, - -1.01633251, - -0.8349969, - -0.3322066, - 1.58059633, - 0.8254174, - 1.05039155, - 0.7928113, - 0.385914057, - 1.59029937, - 1.01633251, - 0.8349978, - 0.332206637 + 300.146545, + -0.224054649, + 286.670166, + -0.8098665, + -0.8306167, + -0.9863483, + 55.934185, + 12.6538782, + -0.265024453, + 0.6916245, + 238.776855 ], "type": "FLOATS" }, { "name": "intercepts", "floats": [ - 3.36230779, - -3.36230469 + 373.62085 ], "type": "FLOATS" - }, - { - "name": "classlabels_ints", - "ints": [ - "0", - "1" - ], - "type": "INTS" } ], "domain": "ai.onnx.ml" }, { "input": [ - "PredictedLabel" + "FeatureVector0" ], "output": [ - "PredictedLabel0" + "FeatureVector1" ], "name": "Identity", "opType": "Identity" }, + { + "input": [ + "Target" + ], + "output": [ + "Target0" + ], + "name": "Identity0", + "opType": "Identity" + }, { "input": [ "Score" @@ -129,14 +126,14 @@ "output": [ "Score0" ], - "name": "Identity0", + "name": "Identity1", "opType": "Identity" } ], - "name": "MultiClassificationLRSaveModelToOnnxTest", + "name": "model", "input": [ { - "name": "Features", + "name": "FeatureVector", "type": { "tensorType": { "elemType": "FLOAT", @@ -146,7 +143,25 @@ "dimValue": "1" }, { - "dimValue": "9" + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Target", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" } ] } @@ -156,10 +171,28 @@ ], "output": [ { - "name": "PredictedLabel0", + "name": "FeatureVector1", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Target0", "type": { "tensorType": { - "elemType": "INT64", + "elemType": "FLOAT", "shape": { "dim": [ { @@ -184,7 +217,7 @@ "dimValue": "1" }, { - "dimValue": "2" + "dimValue": "1" } ] } @@ -194,7 +227,7 @@ ], "valueInfo": [ { - "name": "Features0", + "name": "FeatureVector0", "type": { "tensorType": { "elemType": "FLOAT", @@ -204,7 +237,25 @@ "dimValue": "1" }, { - "dimValue": "9" + "dimValue": "11" + } + ] + } + } + } + }, + { + "name": "Score", + "type": { + "tensorType": { + "elemType": "FLOAT", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "1" } ] } diff --git a/test/BaselineOutput/Common/Onnx/WordEmbeddings/WordEmbeddings.json b/test/BaselineOutput/Common/Onnx/Transforms/Sentiment/SmallWordEmbed.txt similarity index 96% rename from test/BaselineOutput/Common/Onnx/WordEmbeddings/WordEmbeddings.json rename to test/BaselineOutput/Common/Onnx/Transforms/Sentiment/SmallWordEmbed.txt index 923c1519bb..5b1a98942b 100644 --- a/test/BaselineOutput/Common/Onnx/WordEmbeddings/WordEmbeddings.json +++ b/test/BaselineOutput/Common/Onnx/Transforms/Sentiment/SmallWordEmbed.txt @@ -2,12 +2,12 @@ "irVersion": "3", "producerName": "ML.NET", "producerVersion": "##VERSION##", - "domain": "Onnx", + "domain": "machinelearning.dotnet", "graph": { "node": [ { "input": [ - "Cat" + "Tokens" ], "output": [ "LabelEncodedInput" @@ -328,7 +328,7 @@ "MaxWeights" ], "output": [ - "Cat0" + "Embed" ], "name": "Concat", "opType": "Concat", @@ -342,16 +342,26 @@ }, { "input": [ - "Cat0" + "Tokens" ], "output": [ - "Cat1" + "Tokens0" ], "name": "Identity", "opType": "Identity" + }, + { + "input": [ + "Embed" + ], + "output": [ + "Embed0" + ], + "name": "Identity0", + "opType": "Identity" } ], - "name": "WordEmbeddings", + "name": "model", "initializer": [ { "dims": [ @@ -1023,7 +1033,7 @@ ], "input": [ { - "name": "Cat", + "name": "Tokens", "type": { "tensorType": { "elemType": "STRING", @@ -1043,7 +1053,25 @@ ], "output": [ { - "name": "Cat1", + "name": "Tokens0", + "type": { + "tensorType": { + "elemType": "STRING", + "shape": { + "dim": [ + { + "dimValue": "1" + }, + { + "dimValue": "4" + } + ] + } + } + } + }, + { + "name": "Embed0", "type": { "tensorType": { "elemType": "FLOAT", @@ -1063,7 +1091,7 @@ ], "valueInfo": [ { - "name": "Cat0", + "name": "Embed", "type": { "tensorType": { "elemType": "FLOAT", diff --git a/test/Microsoft.ML.Benchmarks/CacheDataViewBench.cs b/test/Microsoft.ML.Benchmarks/CacheDataViewBench.cs index c2b1de8959..0888c5ea8c 100644 --- a/test/Microsoft.ML.Benchmarks/CacheDataViewBench.cs +++ b/test/Microsoft.ML.Benchmarks/CacheDataViewBench.cs @@ -14,9 +14,10 @@ public class CacheDataViewBench // Global. private IDataView _cacheDataView; - // Per iteration. private RowCursor _cursor; - private ValueGetter _getter; + private ValueGetter _seekerGetter; + private ValueGetter _cursorGetter; + private Schema.Column _col; private RowSeeker _seeker; private long[] _positions; @@ -57,30 +58,23 @@ public void Setup() var rand = new Random(0); for (int i = 0; i < _positions.Length; ++i) _positions[i] = rand.Next(Length); - } - [IterationSetup(Target = nameof(CacheWithCursor))] - public void CacheWithCursorSetup() - { - var col = _cacheDataView.Schema.GetColumnOrNull("A").Value; - _cursor = _cacheDataView.GetRowCursor(colIndex => colIndex == col.Index); - _getter = _cursor.GetGetter(col.Index); + _col = _cacheDataView.Schema.GetColumnOrNull("A").Value; + _seeker = ((IRowSeekable)_cacheDataView).GetSeeker(colIndex => colIndex == _col.Index); + _seekerGetter = _seeker.GetGetter(_col.Index); } [Benchmark] public void CacheWithCursor() { + // This setup takes very less time to execute as compared to the actual _cursorGetter. + // The most preferable position for this setup will be in GlobalSetup. + _cursor = _cacheDataView.GetRowCursor(colIndex => colIndex == _col.Index); + _cursorGetter = _cursor.GetGetter(_col.Index); + int val = 0; while (_cursor.MoveNext()) - _getter(ref val); - } - - [IterationSetup(Target = nameof(CacheWithSeeker))] - public void CacheWithSeekerSetup() - { - var col = _cacheDataView.Schema.GetColumnOrNull("A").Value; - _seeker = ((IRowSeekable)_cacheDataView).GetSeeker(colIndex => colIndex == col.Index); - _getter = _seeker.GetGetter(col.Index); + _cursorGetter(ref val); } [Benchmark] @@ -90,7 +84,7 @@ public void CacheWithSeeker() foreach (long pos in _positions) { _seeker.MoveTo(pos); - _getter(ref val); + _seekerGetter(ref val); } } } diff --git a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs deleted file mode 100644 index 24ee741b72..0000000000 --- a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using BenchmarkDotNet.Attributes; -using Microsoft.ML.Legacy; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Benchmarks -{ -#pragma warning disable 612, 618 - public class LegacyPredictionEngineBench - { - private IrisData _irisExample; - private PredictionModel _irisModel; - - private SentimentData _sentimentExample; - private PredictionModel _sentimentModel; - - private BreastCancerData _breastCancerExample; - private PredictionModel _breastCancerModel; - - [GlobalSetup(Target = nameof(MakeIrisPredictions))] - public void SetupIrisPipeline() - { - _irisExample = new IrisData() - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }; - - string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_irisDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new ColumnConcatenator("Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })); - pipeline.Add(new StochasticDualCoordinateAscentClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _irisModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeSentimentPredictions))] - public void SetupSentimentPipeline() - { - _sentimentExample = new SentimentData() - { - SentimentText = "Not a big fan of this." - }; - - string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_sentimentDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new TextFeaturizer("Features", "SentimentText")); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _sentimentModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeBreastCancerPredictions))] - public void SetupBreastCancerPipeline() - { - _breastCancerExample = new BreastCancerData() - { - Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f } - }; - - string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_breastCancerDataPath).CreateFrom(useHeader: false, separator: '\t')); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _breastCancerModel = pipeline.Train(); - } - - [Benchmark] - public void MakeIrisPredictions() - { - for (int i = 0; i < 10000; i++) - { - _irisModel.Predict(_irisExample); - } - } - - [Benchmark] - public void MakeSentimentPredictions() - { - for (int i = 0; i < 10000; i++) - { - _sentimentModel.Predict(_sentimentExample); - } - } - - [Benchmark] - public void MakeBreastCancerPredictions() - { - for (int i = 0; i < 10000; i++) - { - _breastCancerModel.Predict(_breastCancerExample); - } - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 07323552c2..fb91bdde36 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -19,7 +19,6 @@ - @@ -45,8 +44,7 @@ external\%(Identity) - + PreserveNewest diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index fe16701fe7..33939af0a8 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -7,22 +7,23 @@ using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Engines; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; +using Microsoft.ML.Learners; using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; namespace Microsoft.ML.Benchmarks { -#pragma warning disable 612, 618 public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt"); private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + private readonly MLContext _env = new MLContext(seed: 1); + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; + private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, @@ -31,37 +32,47 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics PetalWidth = 5.1f, }; - private Legacy.PredictionModel _trainedModel; + private TransformerChain> _trainedModel; + private PredictionEngine _predictionEngine; private IrisData[][] _batches; - private ClassificationMetrics _metrics; + private MultiClassClassifierMetrics _metrics; protected override IEnumerable GetMetrics() { if (_metrics != null) yield return new Metric( - nameof(ClassificationMetrics.AccuracyMacro), + nameof(MultiClassClassifierMetrics.AccuracyMacro), _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture)); } [Benchmark] - public Legacy.PredictionModel TrainIris() => Train(_dataPath); + public TransformerChain> TrainIris() => Train(_dataPath); - private Legacy.PredictionModel Train(string dataPath) + private TransformerChain> Train(string dataPath) { - var pipeline = new Legacy.LearningPipeline(); + var reader = new TextLoader(_env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + IDataView data = reader.Read(dataPath); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); + var pipeline = new ColumnConcatenatingEstimator(_env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) + .Append(new SdcaMultiClassTrainer(_env, "Label", "Features")); - return pipeline.Train(); + return pipeline.Fit(data); } [Benchmark] public void TrainSentiment() { - var env = new MLContext(seed: 1); // Pipeline var arguments = new TextLoader.Arguments() { @@ -85,9 +96,9 @@ public void TrainSentiment() AllowQuoting = false, AllowSparse = false }; - var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments); + var loader = _env.Data.ReadFromTextFile(_sentimentDataPath, arguments); - var text = TextFeaturizingEstimator.Create(env, + var text = TextFeaturizingEstimator.Create(_env, new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column @@ -103,7 +114,7 @@ public void TrainSentiment() WordFeatureExtractor = null, }, loader); - var trans = WordEmbeddingsExtractingTransformer.Create(env, + var trans = WordEmbeddingsExtractingTransformer.Create(_env, new WordEmbeddingsExtractingTransformer.Arguments() { Column = new WordEmbeddingsExtractingTransformer.Column[1] @@ -118,7 +129,7 @@ public void TrainSentiment() }, text); // Train - var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", maxIterations: 20); + var trainer = new SdcaMultiClassTrainer(_env, "Label", "Features", maxIterations: 20); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); } @@ -127,41 +138,49 @@ public void TrainSentiment() public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); - _consumer.Consume(_trainedModel.Predict(_example)); + _predictionEngine = _trainedModel.CreatePredictionEngine(_env); + _consumer.Consume(_predictionEngine.Predict(_example)); + + var reader = new TextLoader(_env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - var testData = new Legacy.Data.TextLoader(_dataPath).CreateFrom(useHeader: true); - var evaluator = new ClassificationEvaluator(); - _metrics = evaluator.Evaluate(_trainedModel, testData); + IDataView testData = reader.Read(_dataPath); + IDataView scoredTestData = _trainedModel.Transform(testData); + var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); + _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) { var batch = new IrisData[_batchSizes[i]]; - _batches[i] = batch; for (int bi = 0; bi < batch.Length; bi++) { batch[bi] = _example; } + _batches[i] = batch; } } [Benchmark] - public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels; + public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; [Benchmark] - public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0])); + public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[0])); [Benchmark] - public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1])); + public void PredictIrisBatchOf2() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[1])); [Benchmark] - public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2])); - - private void Consume(IEnumerable predictions) - { - foreach (var prediction in predictions) - _consumer.Consume(prediction); - } + public void PredictIrisBatchOf5() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[2])); } public class IrisData @@ -187,5 +206,4 @@ public class IrisPrediction [ColumnName("Score")] public float[] PredictedLabels; } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs index 8d68cb98ff..993c5f48a6 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs @@ -12,6 +12,7 @@ using Microsoft.CodeAnalysis.CSharp; using Microsoft.CodeAnalysis.Diagnostics; using Microsoft.CodeAnalysis.Text; +using Microsoft.ML.StaticPipe; using Xunit; namespace Microsoft.ML.CodeAnalyzer.Tests.Helpers @@ -264,6 +265,7 @@ private static string FormatDiagnostics(DiagnosticAnalyzer analyzer, params Diag private static readonly MetadataReference MLNetCoreReference = RefFromType(); private static readonly MetadataReference MLNetDataReference = RefFromType(); + private static readonly MetadataReference MLNetStaticPipeReference = RefFromType(); protected static MetadataReference RefFromType() => MetadataReference.CreateFromFile(typeof(TType).Assembly.Location); @@ -390,7 +392,8 @@ internal static Project CreateProject(string projectName, ref Solution solution, .AddMetadataReference(projectId, CSharpSymbolsReference) .AddMetadataReference(projectId, CodeAnalysisReference) .AddMetadataReference(projectId, MLNetCoreReference) - .AddMetadataReference(projectId, MLNetDataReference); + .AddMetadataReference(projectId, MLNetDataReference) + .AddMetadataReference(projectId, MLNetStaticPipeReference); int count = 0; foreach (string source in sources) diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj index 7851f01ecb..06e43b098d 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj @@ -26,6 +26,7 @@ + diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeClassResource.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeClassResource.cs index 855dc747df..b060c5247b 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeClassResource.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeClassResource.cs @@ -10,7 +10,7 @@ class Foo public static void Bar() { IHostEnvironment env = null; - var text = TextLoader.CreateReader(env, ctx => new + var text = TextLoaderStatic.CreateReader(env, ctx => new { Label = ctx.LoadBool(0), Text = ctx.LoadText(1), @@ -40,7 +40,7 @@ public static void Bar() // This is wrong but should not fail with our diagnostic since there is a deeper problem that the class // simply is not there. - var text2 = TextLoader.CreateReader(env, ctx => new MissingClass(ctx.LoadText(0))); + var text2 = TextLoaderStatic.CreateReader(env, ctx => new MissingClass(ctx.LoadText(0))); } } diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs index cf1aefd9ac..d7a32847df 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs @@ -11,7 +11,7 @@ class Foo public static void Bar() { IHostEnvironment env = null; - var text = TextLoader.CreateReader(env, ctx => ( + var text = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1), numericFeatures: ctx.LoadFloat(2, 5))); diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResourceChained.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResourceChained.cs index 0c9f03eee0..d0e8db1039 100644 --- a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResourceChained.cs +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResourceChained.cs @@ -11,34 +11,34 @@ class Foo { public static void Bar() { - DataReader Foo1(Func m) + DataReader Foo1(Func m) { IHostEnvironment env = null; // We ought to fail here. - return TextLoader.CreateReader(env, m); + return TextLoaderStatic.CreateReader(env, m); } - DataReader Foo2<[IsShape] T>(Func m) + DataReader Foo2<[IsShape] T>(Func m) { IHostEnvironment env = null; // We ought not to fail here due to that [IsShape], but calls to this method might fail. - return TextLoader.CreateReader(env, m); + return TextLoaderStatic.CreateReader(env, m); } - DataReader Foo3(Func m) + DataReader Foo3(Func m) where T : PipelineColumn { IHostEnvironment env = null; // This should work. - return TextLoader.CreateReader(env, m); + return TextLoaderStatic.CreateReader(env, m); } - DataReader Foo4(Func m) + DataReader Foo4(Func m) where T : IEnumerable { IHostEnvironment env = null; // This should not work. - return TextLoader.CreateReader(env, m); + return TextLoaderStatic.CreateReader(env, m); } void Scratch() diff --git a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj index 8ff5e2dcfd..083e33ea2e 100644 --- a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj +++ b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj @@ -18,7 +18,6 @@ - diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs deleted file mode 100644 index 3911f05af3..0000000000 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ /dev/null @@ -1,846 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.TestFramework; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.RunTests -{ -#pragma warning disable 612, 618 - public class TestCSharpApi : BaseTestClass - { - public TestCSharpApi(ITestOutputHelper output) : base(output) - { - } - - [Fact] - public void TestSimpleExperiment() - { - var dataPath = GetDataPath("adult.tiny.with-schema.txt"); - var env = new MLContext(); - var experiment = env.CreateExperiment(); - - var importInput = new Legacy.Data.TextLoader(dataPath); - var importOutput = experiment.Add(importInput); - - var normalizeInput = new Legacy.Transforms.MinMaxNormalizer - { - Data = importOutput.Data - }; - normalizeInput.AddColumn("NumericFeatures"); - var normalizeOutput = experiment.Add(normalizeInput); - - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - var data = experiment.GetOutput(normalizeOutput.OutputData); - - var schema = data.Schema; - Assert.Equal(5, schema.Count); - var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" }; - for (int i = 0; i < schema.Count; i++) - Assert.Equal(expected[i], schema[i].Name); - } - - [Fact] - public void TestSimpleTrainExperiment() - { - var dataPath = GetDataPath("adult.tiny.with-schema.txt"); - var env = new MLContext(); - var experiment = env.CreateExperiment(); - - var importInput = new Legacy.Data.TextLoader(dataPath); - var importOutput = experiment.Add(importInput); - - var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer - { - Data = importOutput.Data - }; - catInput.AddColumn("Categories"); - var catOutput = experiment.Add(catInput); - - var concatInput = new Legacy.Transforms.ColumnConcatenator - { - Data = catOutput.OutputData - }; - concatInput.AddColumn("Features", "Categories", "NumericFeatures"); - var concatOutput = experiment.Add(concatInput); - - var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier - { - TrainingData = concatOutput.OutputData, - LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, - NumThreads = 1, - Shuffle = false - }; - var sdcaOutput = experiment.Add(sdcaInput); - - var scoreInput = new Legacy.Transforms.DatasetScorer - { - Data = concatOutput.OutputData, - PredictorModel = sdcaOutput.PredictorModel - }; - var scoreOutput = experiment.Add(scoreInput); - - var evalInput = new Legacy.Models.BinaryClassificationEvaluator - { - Data = scoreOutput.ScoredData - }; - var evalOutput = experiment.Add(evalInput); - - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - var data = experiment.GetOutput(evalOutput.OverallMetrics); - - var schema = data.Schema; - var b = schema.TryGetColumnIndex("AUC", out int aucCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == aucCol)) - { - var getter = cursor.GetGetter(aucCol); - b = cursor.MoveNext(); - Assert.True(b); - double auc = 0; - getter(ref auc); - Assert.Equal(0.93, auc, 2); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 output differs from Baseline - public void TestCrossValidationMacro() - { - var dataPath = GetDataPath(TestDatasets.generatedRegressionDatasetmacro.trainFilename); - var env = new MLContext(42); - var subGraph = env.CreateExperiment(); - - var nop = new Legacy.Transforms.NoOperation(); - var nopOutput = subGraph.Add(nop); - - var generate = new Legacy.Transforms.RandomNumberGenerator(); - generate.Column = new[] { new Legacy.Transforms.GenerateNumberTransformColumn() { Name = "Weight1" } }; - generate.Data = nopOutput.OutputData; - var generateOutput = subGraph.Add(generate); - - var learnerInput = new Legacy.Trainers.PoissonRegressor - { - TrainingData = generateOutput.OutputData, - NumThreads = 1, - WeightColumn = "Weight1" - }; - var learnerOutput = subGraph.Add(learnerInput); - - var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner - { - TransformModels = new ArrayVar(nopOutput.Model, generateOutput.Model), - PredictorModel = learnerOutput.PredictorModel - }; - var modelCombineOutput = subGraph.Add(modelCombine); - - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath) - { - Arguments = new Legacy.Data.TextLoaderArguments - { - Separator = new[] { ';' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(11) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Features", - Source = new [] { new TextLoaderRange(0,10) }, - Type = Legacy.Data.DataKind.Num - } - } - } - }; - var importOutput = experiment.Add(importInput); - - var crossValidate = new Legacy.Models.CrossValidator - { - Data = importOutput.Data, - Nodes = subGraph, - Kind = Legacy.Models.MacroUtilsTrainerKinds.SignatureRegressorTrainer, - TransformModel = null, - WeightColumn = "Weight1" - }; - crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; - var crossValidateOutput = experiment.Add(crossValidate); - - experiment.Compile(); - importInput.SetInput(env, experiment); - experiment.Run(); - var data = experiment.GetOutput(crossValidateOutput.OverallMetrics); - - var schema = data.Schema; - var b = schema.TryGetColumnIndex("L1(avg)", out int metricCol); - Assert.True(b); - b = schema.TryGetColumnIndex("Fold Index", out int foldCol); - Assert.True(b); - b = schema.TryGetColumnIndex("IsWeighted", out int isWeightedCol); - using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol || col == isWeightedCol)) - { - var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter>(foldCol); - ReadOnlyMemory fold = default; - var isWeightedGetter = cursor.GetGetter(isWeightedCol); - bool isWeighted = default; - double avg = 0; - double weightedAvg = 0; - for (int w = 0; w < 2; w++) - { - // Get the average. - b = cursor.MoveNext(); - Assert.True(b); - if (w == 1) - getter(ref weightedAvg); - else - getter(ref avg); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); - isWeightedGetter(ref isWeighted); - Assert.True(isWeighted == (w == 1)); - - // Get the standard deviation. - b = cursor.MoveNext(); - Assert.True(b); - double stdev = 0; - getter(ref stdev); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); - if (w == 1) - Assert.Equal(1.585, stdev, 3); - else - Assert.Equal(1.39, stdev, 2); - isWeightedGetter(ref isWeighted); - Assert.True(isWeighted == (w == 1)); - } - double sum = 0; - double weightedSum = 0; - for (int f = 0; f < 2; f++) - { - for (int w = 0; w < 2; w++) - { - b = cursor.MoveNext(); - Assert.True(b); - double val = 0; - getter(ref val); - foldGetter(ref fold); - if (w == 1) - weightedSum += val; - else - sum += val; - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); - isWeightedGetter(ref isWeighted); - Assert.True(isWeighted == (w == 1)); - } - } - Assert.Equal(weightedAvg, weightedSum / 2); - Assert.Equal(avg, sum / 2); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [Fact] - public void TestCrossValidationMacroWithMultiClass() - { - var dataPath = GetDataPath(@"Train-Tiny-28x28.txt"); - var env = new MLContext(42); - var subGraph = env.CreateExperiment(); - - var nop = new Legacy.Transforms.NoOperation(); - var nopOutput = subGraph.Add(nop); - - var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentClassifier - { - TrainingData = nopOutput.OutputData, - NumThreads = 1 - }; - var learnerOutput = subGraph.Add(learnerInput); - - var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner - { - TransformModels = new ArrayVar(nopOutput.Model), - PredictorModel = learnerOutput.PredictorModel - }; - var modelCombineOutput = subGraph.Add(modelCombine); - - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - var importOutput = experiment.Add(importInput); - - var crossValidate = new Legacy.Models.CrossValidator - { - Data = importOutput.Data, - Nodes = subGraph, - Kind = Legacy.Models.MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer, - TransformModel = null - }; - crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; - var crossValidateOutput = experiment.Add(crossValidate); - - experiment.Compile(); - importInput.SetInput(env, experiment); - experiment.Run(); - var data = experiment.GetOutput(crossValidateOutput.OverallMetrics); - - var schema = data.Schema; - var b = schema.TryGetColumnIndex("Accuracy(micro-avg)", out int metricCol); - Assert.True(b); - b = schema.TryGetColumnIndex("Fold Index", out int foldCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) - { - var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter>(foldCol); - ReadOnlyMemory fold = default; - - // Get the average. - b = cursor.MoveNext(); - Assert.True(b); - double avg = 0; - getter(ref avg); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); - - // Get the standard deviation. - b = cursor.MoveNext(); - Assert.True(b); - double stdev = 0; - getter(ref stdev); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); - Assert.Equal(0.015, stdev, 3); - - double sum = 0; - double val = 0; - for (int f = 0; f < 2; f++) - { - b = cursor.MoveNext(); - Assert.True(b); - getter(ref val); - foldGetter(ref fold); - sum += val; - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); - } - Assert.Equal(avg, sum / 2); - b = cursor.MoveNext(); - Assert.False(b); - } - - var confusion = experiment.GetOutput(crossValidateOutput.ConfusionMatrix); - schema = confusion.Schema; - b = schema.TryGetColumnIndex("Count", out int countCol); - Assert.True(b); - b = schema.TryGetColumnIndex("Fold Index", out foldCol); - Assert.True(b); - var type = schema[countCol].Metadata.Schema[MetadataUtils.Kinds.SlotNames].Type; - Assert.True(type is VectorType vecType && vecType.ItemType is TextType && vecType.Size == 10); - var slotNames = default(VBuffer>); - schema[countCol].GetSlotNames(ref slotNames); - var slotNameValues = slotNames.GetValues(); - for (int i = 0; i < slotNameValues.Length; i++) - { - Assert.True(ReadOnlyMemoryUtils.EqualsStr(i.ToString(), slotNameValues[i])); - } - using (var curs = confusion.GetRowCursor(col => true)) - { - var countGetter = curs.GetGetter>(countCol); - var foldGetter = curs.GetGetter>(foldCol); - var confCount = default(VBuffer); - var foldIndex = default(ReadOnlyMemory); - int rowCount = 0; - var foldCur = "Fold 0"; - while (curs.MoveNext()) - { - countGetter(ref confCount); - foldGetter(ref foldIndex); - rowCount++; - Assert.True(ReadOnlyMemoryUtils.EqualsStr(foldCur, foldIndex)); - if (rowCount == 10) - { - rowCount = 0; - foldCur = "Fold 1"; - } - } - Assert.Equal(0, rowCount); - } - - var warnings = experiment.GetOutput(crossValidateOutput.Warnings); - using (var cursor = warnings.GetRowCursor(col => true)) - Assert.False(cursor.MoveNext()); - } - - [Fact] - public void TestCrossValidationMacroMultiClassWithWarnings() - { - var dataPath = GetDataPath(@"Train-Tiny-28x28.txt"); - var env = new MLContext(42); - var subGraph = env.CreateExperiment(); - - var nop = new Legacy.Transforms.NoOperation(); - var nopOutput = subGraph.Add(nop); - - var learnerInput = new Legacy.Trainers.LogisticRegressionClassifier - { - TrainingData = nopOutput.OutputData, - NumThreads = 1 - }; - var learnerOutput = subGraph.Add(learnerInput); - - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - var importOutput = experiment.Add(importInput); - - var filter = new Legacy.Transforms.RowRangeFilter(); - filter.Data = importOutput.Data; - filter.Column = "Label"; - filter.Min = 0; - filter.Max = 5; - var filterOutput = experiment.Add(filter); - - var term = new Legacy.Transforms.TextToKeyConverter(); - term.Column = new[] - { - new Legacy.Transforms.ValueToKeyMappingTransformerColumn() - { - Source = "Label", Name = "Strat", Sort = Legacy.Transforms.ValueToKeyMappingTransformerSortOrder.Value - } - }; - term.Data = filterOutput.OutputData; - var termOutput = experiment.Add(term); - - var crossValidate = new Legacy.Models.CrossValidator - { - Data = termOutput.OutputData, - Nodes = subGraph, - Kind = Legacy.Models.MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer, - TransformModel = null, - StratificationColumn = "Strat" - }; - crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.PredictorModel = learnerOutput.PredictorModel; - var crossValidateOutput = experiment.Add(crossValidate); - - experiment.Compile(); - importInput.SetInput(env, experiment); - experiment.Run(); - var warnings = experiment.GetOutput(crossValidateOutput.Warnings); - - var schema = warnings.Schema; - var b = schema.TryGetColumnIndex("WarningText", out int warningCol); - Assert.True(b); - using (var cursor = warnings.GetRowCursor(col => col == warningCol)) - { - var getter = cursor.GetGetter>(warningCol); - - b = cursor.MoveNext(); - Assert.True(b); - var warning = default(ReadOnlyMemory); - getter(ref warning); - Assert.Contains("test instances with class values not seen in the training set.", warning.ToString()); - b = cursor.MoveNext(); - Assert.True(b); - getter(ref warning); - Assert.Contains("Detected columns of variable length: SortedScores, SortedClasses", warning.ToString()); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [Fact] - public void TestCrossValidationMacroWithStratification() - { - var dataPath = GetDataPath(@"breast-cancer.txt"); - var env = new MLContext(42); - var subGraph = env.CreateExperiment(); - - var nop = new Legacy.Transforms.NoOperation(); - var nopOutput = subGraph.Add(nop); - - var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier - { - TrainingData = nopOutput.OutputData, - NumThreads = 1 - }; - var learnerOutput = subGraph.Add(learnerInput); - - var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner - { - TransformModels = new ArrayVar(nopOutput.Model), - PredictorModel = learnerOutput.PredictorModel - }; - var modelCombineOutput = subGraph.Add(modelCombine); - - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - importInput.Arguments.Column = new Legacy.Data.TextLoaderColumn[] - { - new Legacy.Data.TextLoaderColumn { Name = "Label", Source = new[] { new Legacy.Data.TextLoaderRange(0) } }, - new Legacy.Data.TextLoaderColumn { Name = "Strat", Source = new[] { new Legacy.Data.TextLoaderRange(1) } }, - new Legacy.Data.TextLoaderColumn { Name = "Features", Source = new[] { new Legacy.Data.TextLoaderRange(2, 9) } } - }; - var importOutput = experiment.Add(importInput); - - var crossValidate = new Legacy.Models.CrossValidator - { - Data = importOutput.Data, - Nodes = subGraph, - TransformModel = null, - StratificationColumn = "Strat" - }; - crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; - var crossValidateOutput = experiment.Add(crossValidate); - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - var data = experiment.GetOutput(crossValidateOutput.OverallMetrics); - - var schema = data.Schema; - var b = schema.TryGetColumnIndex("AUC", out int metricCol); - Assert.True(b); - b = schema.TryGetColumnIndex("Fold Index", out int foldCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) - { - var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter>(foldCol); - ReadOnlyMemory fold = default; - - // Get the verage. - b = cursor.MoveNext(); - Assert.True(b); - double avg = 0; - getter(ref avg); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); - - // Get the standard deviation. - b = cursor.MoveNext(); - Assert.True(b); - double stdev = 0; - getter(ref stdev); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); - Assert.Equal(0.00488, stdev, 5); - - double sum = 0; - double val = 0; - for (int f = 0; f < 2; f++) - { - b = cursor.MoveNext(); - Assert.True(b); - getter(ref val); - foldGetter(ref fold); - sum += val; - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); - } - Assert.Equal(avg, sum / 2); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [Fact] - public void TestCrossValidationMacroWithNonDefaultNames() - { - string dataPath = GetDataPath(@"adult.tiny.with-schema.txt"); - var env = new MLContext(42); - var subGraph = env.CreateExperiment(); - - var textToKey = new Legacy.Transforms.TextToKeyConverter(); - textToKey.Column = new[] { new Legacy.Transforms.ValueToKeyMappingTransformerColumn() { Name = "Label1", Source = "Label" } }; - var textToKeyOutput = subGraph.Add(textToKey); - - var hash = new Legacy.Transforms.HashConverter(); - hash.Column = new[] { new Legacy.Transforms.HashJoiningTransformColumn() { Name = "GroupId1", Source = "Workclass" } }; - hash.Data = textToKeyOutput.OutputData; - var hashOutput = subGraph.Add(hash); - - var learnerInput = new Legacy.Trainers.FastTreeRanker - { - TrainingData = hashOutput.OutputData, - NumThreads = 1, - LabelColumn = "Label1", - GroupIdColumn = "GroupId1" - }; - var learnerOutput = subGraph.Add(learnerInput); - - var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner - { - TransformModels = new ArrayVar(textToKeyOutput.Model, hashOutput.Model), - PredictorModel = learnerOutput.PredictorModel - }; - var modelCombineOutput = subGraph.Add(modelCombine); - - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - importInput.Arguments.HasHeader = true; - importInput.Arguments.Column = new TextLoaderColumn[] - { - new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, - new TextLoaderColumn { Name = "Workclass", Source = new[] { new TextLoaderRange(1) }, Type = Legacy.Data.DataKind.Text }, - new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(9, 14) } } - }; - var importOutput = experiment.Add(importInput); - - var crossValidate = new Legacy.Models.CrossValidator - { - Data = importOutput.Data, - Nodes = subGraph, - TransformModel = null, - LabelColumn = "Label1", - GroupColumn = "GroupId1", - NameColumn = "Workclass", - Kind = Legacy.Models.MacroUtilsTrainerKinds.SignatureRankerTrainer - }; - crossValidate.Inputs.Data = textToKey.Data; - crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; - var crossValidateOutput = experiment.Add(crossValidate); - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - var data = experiment.GetOutput(crossValidateOutput.OverallMetrics); - - var schema = data.Schema; - var b = schema.TryGetColumnIndex("NDCG", out int metricCol); - Assert.True(b); - b = schema.TryGetColumnIndex("Fold Index", out int foldCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) - { - var getter = cursor.GetGetter>(metricCol); - var foldGetter = cursor.GetGetter>(foldCol); - ReadOnlyMemory fold = default; - - // Get the verage. - b = cursor.MoveNext(); - Assert.True(b); - var avg = default(VBuffer); - getter(ref avg); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); - - // Get the standard deviation. - b = cursor.MoveNext(); - Assert.True(b); - var stdev = default(VBuffer); - getter(ref stdev); - foldGetter(ref fold); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); - var stdevValues = stdev.GetValues(); - Assert.Equal(2.462, stdevValues[0], 3); - Assert.Equal(2.763, stdevValues[1], 3); - Assert.Equal(3.273, stdevValues[2], 3); - - var sumBldr = new BufferBuilder(R8Adder.Instance); - sumBldr.Reset(avg.Length, true); - var val = default(VBuffer); - for (int f = 0; f < 2; f++) - { - b = cursor.MoveNext(); - Assert.True(b); - getter(ref val); - foldGetter(ref fold); - sumBldr.AddFeatures(0, in val); - Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); - } - var sum = default(VBuffer); - sumBldr.GetResult(ref sum); - - var avgValues = avg.GetValues(); - var sumValues = sum.GetValues(); - for (int i = 0; i < avgValues.Length; i++) - Assert.Equal(avgValues[i], sumValues[i] / 2); - b = cursor.MoveNext(); - Assert.False(b); - } - - data = experiment.GetOutput(crossValidateOutput.PerInstanceMetrics); - Assert.True(data.Schema.TryGetColumnIndex("Instance", out int nameCol)); - using (var cursor = data.GetRowCursor(col => col == nameCol)) - { - var getter = cursor.GetGetter>(nameCol); - while (cursor.MoveNext()) - { - ReadOnlyMemory name = default; - getter(ref name); - Assert.Subset(new HashSet() { "Private", "?", "Federal-gov" }, new HashSet() { name.ToString() }); - if (cursor.Position > 4) - break; - } - } - } - - [Fact] - public void TestOvaMacro() - { - var dataPath = GetDataPath(@"iris.txt"); - var env = new MLContext(42); - // Specify subgraph for OVA - var subGraph = env.CreateExperiment(); - var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { NumThreads = 1 }; - var learnerOutput = subGraph.Add(learnerInput); - // Create pipeline with OVA and multiclass scoring. - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - importInput.Arguments.Column = new TextLoaderColumn[] - { - new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, - new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(1,4) } } - }; - var importOutput = experiment.Add(importInput); - var oneVersusAll = new Legacy.Models.OneVersusAll - { - TrainingData = importOutput.Data, - Nodes = subGraph, - UseProbabilities = true, - }; - var ovaOutput = experiment.Add(oneVersusAll); - var scoreInput = new Legacy.Transforms.DatasetScorer - { - Data = importOutput.Data, - PredictorModel = ovaOutput.PredictorModel - }; - var scoreOutput = experiment.Add(scoreInput); - var evalInput = new Legacy.Models.ClassificationEvaluator - { - Data = scoreOutput.ScoredData - }; - var evalOutput = experiment.Add(evalInput); - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - - var data = experiment.GetOutput(evalOutput.OverallMetrics); - var schema = data.Schema; - var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == accCol)) - { - var getter = cursor.GetGetter(accCol); - b = cursor.MoveNext(); - Assert.True(b); - double acc = 0; - getter(ref acc); - Assert.Equal(0.96, acc, 2); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [Fact] - public void TestOvaMacroWithUncalibratedLearner() - { - var dataPath = GetDataPath(@"iris.txt"); - var env = new MLContext(42); - // Specify subgraph for OVA - var subGraph = env.CreateExperiment(); - var learnerInput = new Legacy.Trainers.AveragedPerceptronBinaryClassifier { Shuffle = false }; - var learnerOutput = subGraph.Add(learnerInput); - // Create pipeline with OVA and multiclass scoring. - var experiment = env.CreateExperiment(); - var importInput = new Legacy.Data.TextLoader(dataPath); - importInput.Arguments.Column = new TextLoaderColumn[] - { - new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, - new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(1,4) } } - }; - var importOutput = experiment.Add(importInput); - var oneVersusAll = new Legacy.Models.OneVersusAll - { - TrainingData = importOutput.Data, - Nodes = subGraph, - UseProbabilities = true, - }; - var ovaOutput = experiment.Add(oneVersusAll); - var scoreInput = new Legacy.Transforms.DatasetScorer - { - Data = importOutput.Data, - PredictorModel = ovaOutput.PredictorModel - }; - var scoreOutput = experiment.Add(scoreInput); - var evalInput = new Legacy.Models.ClassificationEvaluator - { - Data = scoreOutput.ScoredData - }; - var evalOutput = experiment.Add(evalInput); - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - - var data = experiment.GetOutput(evalOutput.OverallMetrics); - var schema = data.Schema; - var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); - Assert.True(b); - using (var cursor = data.GetRowCursor(col => col == accCol)) - { - var getter = cursor.GetGetter(accCol); - b = cursor.MoveNext(); - Assert.True(b); - double acc = 0; - getter(ref acc); - Assert.Equal(0.71, acc, 2); - b = cursor.MoveNext(); - Assert.False(b); - } - } - - [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only - public void TestTensorFlowEntryPoint() - { - var dataPath = GetDataPath("Train-Tiny-28x28.txt"); - var env = new MLContext(42); - var experiment = env.CreateExperiment(); - - var importInput = new Legacy.Data.TextLoader(dataPath); - importInput.Arguments.Column = new TextLoaderColumn[] - { - new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, - new TextLoaderColumn { Name = "Placeholder", Source = new[] { new TextLoaderRange(1, 784) } } - }; - var importOutput = experiment.Add(importInput); - - var tfTransformInput = new Legacy.Transforms.TensorFlowScorer - { - Data = importOutput.Data, - ModelLocation = "mnist_model/frozen_saved_model.pb", - InputColumns = new[] { "Placeholder" }, - OutputColumns = new[] { "Softmax" }, - }; - var tfTransformOutput = experiment.Add(tfTransformInput); - - experiment.Compile(); - experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); - experiment.Run(); - var data = experiment.GetOutput(tfTransformOutput.OutputData); - - var schema = data.Schema; - Assert.Equal(3, schema.Count); - Assert.Equal("Softmax", schema[2].Name); - Assert.Equal(10, (schema[2].Type as VectorType)?.Size); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index a2fc9c0107..b7c883d53b 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -39,7 +39,6 @@ namespace Microsoft.ML.RunTests { -#pragma warning disable 612 public partial class TestEntryPoints : CoreBaseTestClass { public TestEntryPoints(ITestOutputHelper output) : base(output) @@ -51,7 +50,7 @@ private IDataView GetBreastCancerDataView() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - return Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + return EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -71,7 +70,7 @@ private IDataView GetBreastCancerDataviewWithTextColumns() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - return Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + return EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -985,7 +984,7 @@ public void EntryPointPipelineEnsembleText() { var dataPath = GetDataPath("lm.sample.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + var dataView = EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -1199,7 +1198,7 @@ public void EntryPointMulticlassPipelineEnsemble() { var dataPath = GetDataPath("iris.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + var dataView = EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -1346,8 +1345,8 @@ public void EntryPointPipelineEnsembleGetSummary() var dataPath = GetDataPath("breast-cancer-withheader.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); var dataView = - Legacy.EntryPoints.ImportTextData.TextLoader(Env, - new Legacy.EntryPoints.ImportTextData.LoaderInput + EntryPoints.ImportTextData.TextLoader(Env, + new EntryPoints.ImportTextData.LoaderInput { InputFile = inputFile, Arguments = @@ -3357,7 +3356,7 @@ public void EntryPointLinearPredictorSummary() var dataPath = GetDataPath("breast-cancer-withheader.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + var dataView = EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -3431,7 +3430,7 @@ public void EntryPointPcaPredictorSummary() var dataPath = GetDataPath("MNIST.Train.0-class.tiny.txt"); using (var inputFile = new SimpleFileHandle(Env, dataPath, false, false)) { - var dataView = Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + var dataView = EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -3638,7 +3637,7 @@ public void EntryPointWordEmbeddings() "The five boxing wizards jump quickly." }); var inputFile = new SimpleFileHandle(Env, dataFile, false, false); - var dataView = Legacy.EntryPoints.ImportTextData.TextLoader(Env, new Legacy.EntryPoints.ImportTextData.LoaderInput() + var dataView = EntryPoints.ImportTextData.TextLoader(Env, new EntryPoints.ImportTextData.LoaderInput() { Arguments = { @@ -3868,7 +3867,1793 @@ public void EntryPointHashJoinCountTable() }); } + [Fact] + public void TestSimpleExperiment() + { + var dataPath = GetDataPath("adult.tiny.with-schema.txt"); + string inputGraph = @"{ + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': null, + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_642faec2bf064255bc9a2b1044e9d116' + } + }, { + 'Name': 'Transforms.MinMaxNormalizer', + 'Inputs': { + 'Column': [{ + 'FixZero': null, + 'MaxTrainingExamples': null, + 'Name': 'NumericFeatures', + 'Source': 'NumericFeatures' + } + ], + 'FixZero': true, + 'MaxTrainingExamples': 1000000000, + 'Data': '$Var_642faec2bf064255bc9a2b1044e9d116' + }, + 'Outputs': { + 'OutputData': '$outputData', + 'Model': '$Var_9673b095f98f4ebcb19e8eb75a7a12e9' + } + } + ] + }"; + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var data = runner.GetOutput("outputData"); + var schema = data.Schema; + Assert.Equal(5, schema.Count); + var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" }; + for (int i = 0; i < schema.Count; i++) + Assert.Equal(expected[i], schema[i].Name); + } + + [Fact] + public void TestSimpleTrainExperiment() + { + var dataPath = GetDataPath("adult.tiny.with-schema.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': null, + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_99eb21288359485f936577da8f2e1061' + } + }, { + 'Name': 'Transforms.CategoricalOneHotVectorizer', + 'Inputs': { + 'Column': [{ + 'OutputKind': null, + 'MaxNumTerms': null, + 'Term': null, + 'Sort': null, + 'TextKeyValues': null, + 'Name': 'Categories', + 'Source': 'Categories' + } + ], + 'OutputKind': 'Ind', + 'MaxNumTerms': 1000000, + 'Term': null, + 'Sort': 'Occurrence', + 'TextKeyValues': true, + 'Data': '$Var_99eb21288359485f936577da8f2e1061' + }, + 'Outputs': { + 'OutputData': '$Var_c9e14b64d1a44114853331e80f1bde57', + 'Model': '$Var_85534ab1fc57480899180be5bbf20b38' + } + }, { + 'Name': 'Transforms.ColumnConcatenator', + 'Inputs': { + 'Column': [{ + 'Name': 'Features', + 'Source': [ + 'Categories', + 'NumericFeatures' + ] + } + ], + 'Data': '$Var_c9e14b64d1a44114853331e80f1bde57' + }, + 'Outputs': { + 'OutputData': '$Var_51d3ddc9792d4c6eb975e600e87b8cbc', + 'Model': '$Var_e3888e65f822424ca92959e442827d48' + } + }, { + 'Name': 'Trainers.StochasticDualCoordinateAscentBinaryClassifier', + 'Inputs': { + 'LossFunction': { + 'Name': 'HingeLoss', + 'Settings': { + 'Margin': 1.1 + } + }, + 'PositiveInstanceWeight': 1.0, + 'Calibrator': { + 'Name': 'PlattCalibrator', + 'Settings': {} + }, + 'MaxCalibrationExamples': 1000000, + 'L2Const': null, + 'L1Threshold': null, + 'NumThreads': 1, + 'ConvergenceTolerance': 0.1, + 'MaxIterations': null, + 'Shuffle': false, + 'CheckFrequency': null, + 'BiasLearningRate': 0.0, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_51d3ddc9792d4c6eb975e600e87b8cbc', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_e7e860bdbf1c4a628a2a0912673afd36' + } + }, { + 'Name': 'Transforms.DatasetScorer', + 'Inputs': { + 'Data': '$Var_51d3ddc9792d4c6eb975e600e87b8cbc', + 'PredictorModel': '$Var_e7e860bdbf1c4a628a2a0912673afd36', + 'Suffix': null + }, + 'Outputs': { + 'ScoredData': '$Var_be77f9c4e45c43b7a67984304c291bf5', + 'ScoringTransform': '$Var_826e5697e56a467a81331c5ef3eff37f' + } + }, { + 'Name': 'Models.BinaryClassificationEvaluator', + 'Inputs': { + 'ProbabilityColumn': null, + 'Threshold': 0.0, + 'UseRawScoreThreshold': true, + 'NumRocExamples': 100000, + 'MaxAucExamples': -1, + 'NumAuPrcExamples': 100000, + 'LabelColumn': null, + 'WeightColumn': null, + 'ScoreColumn': null, + 'StratColumn': null, + 'Data': '$Var_be77f9c4e45c43b7a67984304c291bf5', + 'NameColumn': 'Name' + }, + 'Outputs': { + 'ConfusionMatrix': '$Var_cd6d3485a95d4405b469ce65c124e04a', + 'Warnings': '$Var_94528ba8fca14eb48b7e3f712aced38a', + 'OverallMetrics': '$Var_2130b277d4e0485f9cc5162c176767fa', + 'PerInstanceMetrics': '$Var_991a2f8bed28442bb9bd0a0b9ff14e45' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var data = runner.GetOutput("Var_2130b277d4e0485f9cc5162c176767fa"); + + var schema = data.Schema; + var b = schema.TryGetColumnIndex("AUC", out int aucCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == aucCol)) + { + var getter = cursor.GetGetter(aucCol); + b = cursor.MoveNext(); + Assert.True(b); + double auc = 0; + getter(ref auc); + Assert.Equal(0.93, auc, 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 output differs from Baseline + public void TestCrossValidationMacro() + { + var dataPath = GetDataPath(TestDatasets.generatedRegressionDatasetmacro.trainFilename); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + ';' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': 'R4', + 'Source': [{ + 'Min': 11, + 'Max': 11, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Features', + 'Type': 'R4', + 'Source': [{ + 'Min': 0, + 'Max': 10, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': true + } + }, + 'Outputs': { + 'Data': '$Var_6b6d6b5b5f894374a98904481d876a6e' + } + }, { + 'Name': 'Models.CrossValidator', + 'Inputs': { + 'Data': '$Var_6b6d6b5b5f894374a98904481d876a6e', + 'TransformModel': null, + 'Nodes': [{ + 'Name': 'Transforms.NoOperation', + 'Inputs': { + 'Data': '$Var_abda1d0923f64b56bd01dc42fb57db33' + }, + 'Outputs': { + 'OutputData': '$Var_65ecee1d96a84b9d9645f616b278e77e', + 'Model': '$Var_6807c0e8cb42452c8fc687545aabc43b' + } + }, { + 'Name': 'Transforms.RandomNumberGenerator', + 'Inputs': { + 'Column': [{ + 'Name': 'Weight1', + 'UseCounter': null, + 'Seed': null + } + ], + 'UseCounter': false, + 'Seed': 42, + 'Data': '$Var_65ecee1d96a84b9d9645f616b278e77e' + }, + 'Outputs': { + 'OutputData': '$Var_8b36a1e70c9f4504973140ad15eac72f', + 'Model': '$Var_94bac81ee2e448ba82e3a21e116c0f9c' + } + }, { + 'Name': 'Trainers.PoissonRegressor', + 'Inputs': { + 'L2Weight': 1.0, + 'L1Weight': 1.0, + 'OptTol': 1E-07, + 'MemorySize': 20, + 'MaxIterations': 2147483647, + 'SgdInitializationTolerance': 0.0, + 'Quiet': false, + 'InitWtsDiameter': 0.0, + 'UseThreads': true, + 'NumThreads': 1, + 'DenseOptimizer': false, + 'EnforceNonNegativity': false, + 'WeightColumn': { + 'Value': 'Weight1', + 'IsExplicit': true + }, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_8b36a1e70c9f4504973140ad15eac72f', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_5763097adbdb40e3b161540cb0c88b91' + } + }, { + 'Name': 'Transforms.ManyHeterogeneousModelCombiner', + 'Inputs': { + 'TransformModels': [ + '$Var_6807c0e8cb42452c8fc687545aabc43b', + '$Var_94bac81ee2e448ba82e3a21e116c0f9c' + ], + 'PredictorModel': '$Var_5763097adbdb40e3b161540cb0c88b91' + }, + 'Outputs': { + 'PredictorModel': '$Var_77f3e99700ae453586513565171faf55' + } + } + ], + 'Inputs': { + 'Data': '$Var_abda1d0923f64b56bd01dc42fb57db33' + }, + 'Outputs': { + 'PredictorModel': '$Var_77f3e99700ae453586513565171faf55' + }, + 'StratificationColumn': null, + 'NumFolds': 2, + 'Kind': 'SignatureRegressorTrainer', + 'LabelColumn': 'Label', + 'WeightColumn': { + 'Value': 'Weight1', + 'IsExplicit': true + }, + 'GroupColumn': null, + 'NameColumn': null + }, + 'Outputs': { + 'PredictorModel': '$Var_5aaddf0cdc6d4d92b05a2804fcc3a2ee', + 'Warnings': '$Var_33701e91260c4a7184fd595ce392cb08', + 'OverallMetrics': '$overallMetrics', + 'PerInstanceMetrics': '$Var_89681d817cf543ecabbe6421bf37acb2', + 'ConfusionMatrix': '$Var_43fa86567b5f4e129e58bd12a575c06b' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var data = runner.GetOutput("overallMetrics"); + + var schema = data.Schema; + var b = schema.TryGetColumnIndex("L1(avg)", out int metricCol); + Assert.True(b); + b = schema.TryGetColumnIndex("Fold Index", out int foldCol); + Assert.True(b); + b = schema.TryGetColumnIndex("IsWeighted", out int isWeightedCol); + using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol || col == isWeightedCol)) + { + var getter = cursor.GetGetter(metricCol); + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; + var isWeightedGetter = cursor.GetGetter(isWeightedCol); + bool isWeighted = default; + double avg = 0; + double weightedAvg = 0; + for (int w = 0; w < 2; w++) + { + // Get the average. + b = cursor.MoveNext(); + Assert.True(b); + if (w == 1) + getter(ref weightedAvg); + else + getter(ref avg); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); + isWeightedGetter(ref isWeighted); + Assert.True(isWeighted == (w == 1)); + + // Get the standard deviation. + b = cursor.MoveNext(); + Assert.True(b); + double stdev = 0; + getter(ref stdev); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); + if (w == 1) + Assert.Equal(1.585, stdev, 3); + else + Assert.Equal(1.39, stdev, 2); + isWeightedGetter(ref isWeighted); + Assert.True(isWeighted == (w == 1)); + } + double sum = 0; + double weightedSum = 0; + for (int f = 0; f < 2; f++) + { + for (int w = 0; w < 2; w++) + { + b = cursor.MoveNext(); + Assert.True(b); + double val = 0; + getter(ref val); + foldGetter(ref fold); + if (w == 1) + weightedSum += val; + else + sum += val; + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); + isWeightedGetter(ref isWeighted); + Assert.True(isWeighted == (w == 1)); + } + } + Assert.Equal(weightedAvg, weightedSum / 2); + Assert.Equal(avg, sum / 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [Fact] + public void TestCrossValidationMacroWithMultiClass() + { + var dataPath = GetDataPath(@"Train-Tiny-28x28.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': null, + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_48530e4c7d0c4d0889ba9e6e80e6eb73' + } + }, { + 'Name': 'Models.CrossValidator', + 'Inputs': { + 'Data': '$Var_48530e4c7d0c4d0889ba9e6e80e6eb73', + 'TransformModel': null, + 'Nodes': [{ + 'Name': 'Transforms.NoOperation', + 'Inputs': { + 'Data': '$Var_1a2f44ae0aec4af4b4d1337d2cc733da' + }, + 'Outputs': { + 'OutputData': '$Var_a060169d8a924964b71447904c0d2ee9', + 'Model': '$Var_dbd0f197ee7145ce91ac26ef62936206' + } + }, { + 'Name': 'Trainers.StochasticDualCoordinateAscentClassifier', + 'Inputs': { + 'LossFunction': { + 'Name': 'LogLoss', + 'Settings': {} + }, + 'L2Const': null, + 'L1Threshold': null, + 'NumThreads': 1, + 'ConvergenceTolerance': 0.1, + 'MaxIterations': null, + 'Shuffle': true, + 'CheckFrequency': null, + 'BiasLearningRate': 0.0, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_a060169d8a924964b71447904c0d2ee9', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_0bb334380a514e0ab6b2215b0c049846' + } + }, { + 'Name': 'Transforms.ManyHeterogeneousModelCombiner', + 'Inputs': { + 'TransformModels': [ + '$Var_dbd0f197ee7145ce91ac26ef62936206' + ], + 'PredictorModel': '$Var_0bb334380a514e0ab6b2215b0c049846' + }, + 'Outputs': { + 'PredictorModel': '$Var_0b4526e0c7534eada0264802128c32c5' + } + } + ], + 'Inputs': { + 'Data': '$Var_1a2f44ae0aec4af4b4d1337d2cc733da' + }, + 'Outputs': { + 'PredictorModel': '$Var_0b4526e0c7534eada0264802128c32c5' + }, + 'StratificationColumn': null, + 'NumFolds': 2, + 'Kind': 'SignatureMultiClassClassifierTrainer', + 'LabelColumn': 'Label', + 'WeightColumn': null, + 'GroupColumn': null, + 'NameColumn': null + }, + 'Outputs': { + 'PredictorModel': '$Var_76decfcf71f5447d92869a4dd9200ea6', + 'Warnings': '$warnings', + 'OverallMetrics': '$overallMetrics', + 'PerInstanceMetrics': '$Var_74d0215056034d6c9a99f90485530b89', + 'ConfusionMatrix': '$confusionMatrix' + } + } + ] + } + "; + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var data = runner.GetOutput("overallMetrics"); + + var schema = data.Schema; + var b = schema.TryGetColumnIndex("Accuracy(micro-avg)", out int metricCol); + Assert.True(b); + b = schema.TryGetColumnIndex("Fold Index", out int foldCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) + { + var getter = cursor.GetGetter(metricCol); + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; + + // Get the average. + b = cursor.MoveNext(); + Assert.True(b); + double avg = 0; + getter(ref avg); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); + + // Get the standard deviation. + b = cursor.MoveNext(); + Assert.True(b); + double stdev = 0; + getter(ref stdev); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); + Assert.Equal(0.024809923969586353, stdev, 3); + + double sum = 0; + double val = 0; + for (int f = 0; f < 2; f++) + { + b = cursor.MoveNext(); + Assert.True(b); + getter(ref val); + foldGetter(ref fold); + sum += val; + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); + } + Assert.Equal(avg, sum / 2); + b = cursor.MoveNext(); + Assert.False(b); + } + + var confusion = runner.GetOutput("confusionMatrix"); + schema = confusion.Schema; + b = schema.TryGetColumnIndex("Count", out int countCol); + Assert.True(b); + b = schema.TryGetColumnIndex("Fold Index", out foldCol); + Assert.True(b); + var type = schema[countCol].Metadata.Schema[MetadataUtils.Kinds.SlotNames].Type; + Assert.True(type is VectorType vecType && vecType.ItemType is TextType && vecType.Size == 10); + var slotNames = default(VBuffer>); + schema[countCol].GetSlotNames(ref slotNames); + var slotNameValues = slotNames.GetValues(); + for (int i = 0; i < slotNameValues.Length; i++) + { + Assert.True(ReadOnlyMemoryUtils.EqualsStr(i.ToString(), slotNameValues[i])); + } + using (var curs = confusion.GetRowCursor(col => true)) + { + var countGetter = curs.GetGetter>(countCol); + var foldGetter = curs.GetGetter>(foldCol); + var confCount = default(VBuffer); + var foldIndex = default(ReadOnlyMemory); + int rowCount = 0; + var foldCur = "Fold 0"; + while (curs.MoveNext()) + { + countGetter(ref confCount); + foldGetter(ref foldIndex); + rowCount++; + Assert.True(ReadOnlyMemoryUtils.EqualsStr(foldCur, foldIndex)); + if (rowCount == 10) + { + rowCount = 0; + foldCur = "Fold 1"; + } + } + Assert.Equal(0, rowCount); + } + + var warnings = runner.GetOutput("warnings"); + using (var cursor = warnings.GetRowCursor(col => true)) + Assert.False(cursor.MoveNext()); + } + + [Fact] + public void TestCrossValidationMacroMultiClassWithWarnings() + { + var dataPath = GetDataPath(@"Train-Tiny-28x28.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': null, + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_48dc3daef3924a22af794e67896272b0' + } + }, { + 'Name': 'Transforms.RowRangeFilter', + 'Inputs': { + 'Column': 'Label', + 'Min': 0.0, + 'Max': 5.0, + 'Complement': false, + 'IncludeMin': true, + 'IncludeMax': null, + 'Data': '$Var_48dc3daef3924a22af794e67896272b0' + }, + 'Outputs': { + 'OutputData': '$Var_64f1865a99b84b9d9e0c72292c14c3af', + 'Model': '$Var_4a34fa76d6d04c14b57b8f146010b9ad' + } + }, { + 'Name': 'Transforms.TextToKeyConverter', + 'Inputs': { + 'Column': [{ + 'MaxNumTerms': null, + 'Term': null, + 'Sort': 'Value', + 'TextKeyValues': null, + 'Name': 'Strat', + 'Source': 'Label' + } + ], + 'MaxNumTerms': 1000000, + 'Term': null, + 'Sort': 'Occurrence', + 'TextKeyValues': false, + 'Data': '$Var_64f1865a99b84b9d9e0c72292c14c3af' + }, + 'Outputs': { + 'OutputData': '$Var_41d7ff9c3dcd45fc869f2691dd628797', + 'Model': '$Var_ee952a378c624306a7b6b8b65dbb8583' + } + }, { + 'Name': 'Models.CrossValidator', + 'Inputs': { + 'Data': '$Var_41d7ff9c3dcd45fc869f2691dd628797', + 'TransformModel': null, + 'Nodes': [{ + 'Name': 'Transforms.NoOperation', + 'Inputs': { + 'Data': '$Var_28af8fabe6dd446a9f20aa97c53c4d4e' + }, + 'Outputs': { + 'OutputData': '$Var_fb8137cb48ac49a7b1b56aa3ed5e0b23', + 'Model': '$Var_2cfc22486a4f475f8dc814feccb08f71' + } + }, { + 'Name': 'Trainers.LogisticRegressionClassifier', + 'Inputs': { + 'ShowTrainingStats': false, + 'L2Weight': 1.0, + 'L1Weight': 1.0, + 'OptTol': 1E-07, + 'MemorySize': 20, + 'MaxIterations': 2147483647, + 'SgdInitializationTolerance': 0.0, + 'Quiet': false, + 'InitWtsDiameter': 0.0, + 'UseThreads': true, + 'NumThreads': 1, + 'DenseOptimizer': false, + 'EnforceNonNegativity': false, + 'WeightColumn': null, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_fb8137cb48ac49a7b1b56aa3ed5e0b23', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_05e29f93f3bb4c31a93d71e051dfbb2a' + } + } + ], + 'Inputs': { + 'Data': '$Var_28af8fabe6dd446a9f20aa97c53c4d4e' + }, + 'Outputs': { + 'PredictorModel': '$Var_05e29f93f3bb4c31a93d71e051dfbb2a' + }, + 'StratificationColumn': 'Strat', + 'NumFolds': 2, + 'Kind': 'SignatureMultiClassClassifierTrainer', + 'LabelColumn': 'Label', + 'WeightColumn': null, + 'GroupColumn': null, + 'NameColumn': null + }, + 'Outputs': { + 'PredictorModel': '$Var_2df88bffdbca48d5972decf058c26e3b', + 'Warnings': '$warning', + 'OverallMetrics': '$Var_94ec7af856fa4c2aa16f354cf51cee78', + 'PerInstanceMetrics': '$Var_637187e4984f4eed93cd37ab20685867', + 'ConfusionMatrix': '$Var_c5fe1a4fbded49898173662f6be2f6cc' + } + } + ] + } + "; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var warnings = runner.GetOutput("warning"); + + var schema = warnings.Schema; + var b = schema.TryGetColumnIndex("WarningText", out int warningCol); + Assert.True(b); + using (var cursor = warnings.GetRowCursor(col => col == warningCol)) + { + var getter = cursor.GetGetter>(warningCol); + + b = cursor.MoveNext(); + Assert.True(b); + var warning = default(ReadOnlyMemory); + getter(ref warning); + Assert.Contains("test instances with class values not seen in the training set.", warning.ToString()); + b = cursor.MoveNext(); + Assert.True(b); + getter(ref warning); + Assert.Contains("Detected columns of variable length: SortedScores, SortedClasses", warning.ToString()); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [Fact] + public void TestCrossValidationMacroWithStratification() + { + var dataPath = GetDataPath(@"breast-cancer.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': null, + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Strat', + 'Type': null, + 'Source': [{ + 'Min': 1, + 'Max': 1, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Features', + 'Type': null, + 'Source': [{ + 'Min': 2, + 'Max': 9, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_95d56835dc384629bd288ea0a8879277' + } + }, { + 'Name': 'Models.CrossValidator', + 'Inputs': { + 'Data': '$Var_95d56835dc384629bd288ea0a8879277', + 'TransformModel': null, + 'Nodes': [{ + 'Name': 'Transforms.NoOperation', + 'Inputs': { + 'Data': '$Var_e02622de697b478e9b7d84a5220fee8c' + }, + 'Outputs': { + 'OutputData': '$Var_44f5c60e439b49fe9e5bf372be4613ee', + 'Model': '$Var_14976738a67940a58cfeffdf795a74c1' + } + }, { + 'Name': 'Trainers.StochasticDualCoordinateAscentBinaryClassifier', + 'Inputs': { + 'LossFunction': { + 'Name': 'LogLoss', + 'Settings': {} + }, + 'PositiveInstanceWeight': 1.0, + 'Calibrator': { + 'Name': 'PlattCalibrator', + 'Settings': {} + }, + 'MaxCalibrationExamples': 1000000, + 'L2Const': null, + 'L1Threshold': null, + 'NumThreads': 1, + 'ConvergenceTolerance': 0.1, + 'MaxIterations': null, + 'Shuffle': true, + 'CheckFrequency': null, + 'BiasLearningRate': 0.0, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_44f5c60e439b49fe9e5bf372be4613ee', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_d0c2303905c146b6873693e58ed6e2aa' + } + }, { + 'Name': 'Transforms.ManyHeterogeneousModelCombiner', + 'Inputs': { + 'TransformModels': [ + '$Var_14976738a67940a58cfeffdf795a74c1' + ], + 'PredictorModel': '$Var_d0c2303905c146b6873693e58ed6e2aa' + }, + 'Outputs': { + 'PredictorModel': '$Var_250e906783ab442e85af77298c531199' + } + } + ], + 'Inputs': { + 'Data': '$Var_e02622de697b478e9b7d84a5220fee8c' + }, + 'Outputs': { + 'PredictorModel': '$Var_250e906783ab442e85af77298c531199' + }, + 'StratificationColumn': 'Strat', + 'NumFolds': 2, + 'Kind': 'SignatureBinaryClassifierTrainer', + 'LabelColumn': 'Label', + 'WeightColumn': null, + 'GroupColumn': null, + 'NameColumn': null + }, + 'Outputs': { + 'PredictorModel': '$Var_c824c370674e4c358012ca07e04ee79e', + 'Warnings': '$Var_4f7a5c14043247fdb53ea3a264afcb6f', + 'OverallMetrics': '$overallmetrics', + 'PerInstanceMetrics': '$Var_1fe20a06e4a14215bc09ba8ff7ae603b', + 'ConfusionMatrix': '$Var_d159331c1bca445792a37ddd143b3a25' + } + } + ] + } + "; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + var data = runner.GetOutput("overallmetrics"); + + var schema = data.Schema; + var b = schema.TryGetColumnIndex("AUC", out int metricCol); + Assert.True(b); + b = schema.TryGetColumnIndex("Fold Index", out int foldCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) + { + var getter = cursor.GetGetter(metricCol); + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; + + // Get the verage. + b = cursor.MoveNext(); + Assert.True(b); + double avg = 0; + getter(ref avg); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); + + // Get the standard deviation. + b = cursor.MoveNext(); + Assert.True(b); + double stdev = 0; + getter(ref stdev); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); + Assert.Equal(0.00481, stdev, 5); + + double sum = 0; + double val = 0; + for (int f = 0; f < 2; f++) + { + b = cursor.MoveNext(); + Assert.True(b); + getter(ref val); + foldGetter(ref fold); + sum += val; + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); + } + Assert.Equal(avg, sum / 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [Fact] + public void TestCrossValidationMacroWithNonDefaultNames() + { + string dataPath = GetDataPath(@"adult.tiny.with-schema.txt"); + string inputGraph = @" + { + 'Nodes': [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': null, + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Workclass', + 'Type': 'TX', + 'Source': [{ + 'Min': 1, + 'Max': 1, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Features', + 'Type': null, + 'Source': [{ + 'Min': 9, + 'Max': 14, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': true + } + }, + 'Outputs': { + 'Data': '$Var_bfb5ef5be6f547de88af2409c8c35443' + } + }, { + 'Name': 'Models.CrossValidator', + 'Inputs': { + 'Data': '$Var_bfb5ef5be6f547de88af2409c8c35443', + 'TransformModel': null, + 'Nodes': [{ + 'Name': 'Transforms.TextToKeyConverter', + 'Inputs': { + 'Column': [{ + 'MaxNumTerms': null, + 'Term': null, + 'Sort': null, + 'TextKeyValues': null, + 'Name': 'Label1', + 'Source': 'Label' + } + ], + 'MaxNumTerms': 1000000, + 'Term': null, + 'Sort': 'Occurrence', + 'TextKeyValues': false, + 'Data': '$Var_48d35aae527f439398805f51e5f0cfab' + }, + 'Outputs': { + 'OutputData': '$Var_44ac8ba819da483089dacc0f12bae3d6', + 'Model': '$Var_2039a1ba743549c1989de460c105b354' + } + }, { + 'Name': 'Transforms.HashConverter', + 'Inputs': { + 'Column': [{ + 'Join': null, + 'CustomSlotMap': null, + 'HashBits': null, + 'Seed': null, + 'Ordered': null, + 'Name': 'GroupId1', + 'Source': 'Workclass' + } + ], + 'Join': true, + 'HashBits': 31, + 'Seed': 314489979, + 'Ordered': true, + 'Data': '$Var_44ac8ba819da483089dacc0f12bae3d6' + }, + 'Outputs': { + 'OutputData': '$Var_8f51ed90f5b642b2a80eeb628d67a5b3', + 'Model': '$Var_5d04d6405abb40ed9efb0486c2e1688b' + } + }, { + 'Name': 'Trainers.FastTreeRanker', + 'Inputs': { + 'CustomGains': '0,3,7,15,31', + 'TrainDcg': false, + 'SortingAlgorithm': 'DescendingStablePessimistic', + 'LambdaMartMaxTruncation': 100, + 'ShiftedNdcg': false, + 'CostFunctionParam': 'w', + 'DistanceWeight2': false, + 'NormalizeQueryLambdas': false, + 'BestStepRankingRegressionTrees': false, + 'UseLineSearch': false, + 'NumPostBracketSteps': 0, + 'MinStepSize': 0.0, + 'OptimizationAlgorithm': 'GradientDescent', + 'EarlyStoppingRule': null, + 'EarlyStoppingMetrics': 1, + 'EnablePruning': false, + 'UseTolerantPruning': false, + 'PruningThreshold': 0.004, + 'PruningWindowSize': 5, + 'LearningRates': 0.2, + 'Shrinkage': 1.0, + 'DropoutRate': 0.0, + 'GetDerivativesSampleRate': 1, + 'WriteLastEnsemble': false, + 'MaxTreeOutput': 100.0, + 'RandomStart': false, + 'FilterZeroLambdas': false, + 'BaselineScoresFormula': null, + 'BaselineAlphaRisk': null, + 'PositionDiscountFreeform': null, + 'ParallelTrainer': { + 'Name': 'Single', + 'Settings': {} + }, + 'NumThreads': 1, + 'RngSeed': 123, + 'FeatureSelectSeed': 123, + 'EntropyCoefficient': 0.0, + 'HistogramPoolSize': -1, + 'DiskTranspose': null, + 'FeatureFlocks': true, + 'CategoricalSplit': false, + 'MaxCategoricalGroupsPerNode': 64, + 'MaxCategoricalSplitPoints': 64, + 'MinDocsPercentageForCategoricalSplit': 0.001, + 'MinDocsForCategoricalSplit': 100, + 'Bias': 0.0, + 'Bundling': 'None', + 'MaxBins': 255, + 'SparsifyThreshold': 0.7, + 'FeatureFirstUsePenalty': 0.0, + 'FeatureReusePenalty': 0.0, + 'GainConfidenceLevel': 0.0, + 'SoftmaxTemperature': 0.0, + 'ExecutionTimes': false, + 'NumLeaves': 20, + 'MinDocumentsInLeafs': 10, + 'NumTrees': 100, + 'FeatureFraction': 1.0, + 'BaggingSize': 0, + 'BaggingTrainFraction': 0.7, + 'SplitFraction': 1.0, + 'Smoothing': 0.0, + 'AllowEmptyTrees': true, + 'FeatureCompressionLevel': 1, + 'CompressEnsemble': false, + 'MaxTreesAfterCompression': -1, + 'PrintTestGraph': false, + 'PrintTrainValidGraph': false, + 'TestFrequency': 2147483647, + 'GroupIdColumn': { + 'Value': 'GroupId1', + 'IsExplicit': true + }, + 'WeightColumn': null, + 'LabelColumn': 'Label1', + 'TrainingData': '$Var_8f51ed90f5b642b2a80eeb628d67a5b3', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_53eb1dedb8234950affa64daaa770427' + } + }, { + 'Name': 'Transforms.ManyHeterogeneousModelCombiner', + 'Inputs': { + 'TransformModels': [ + '$Var_2039a1ba743549c1989de460c105b354', + '$Var_5d04d6405abb40ed9efb0486c2e1688b' + ], + 'PredictorModel': '$Var_53eb1dedb8234950affa64daaa770427' + }, + 'Outputs': { + 'PredictorModel': '$Var_dd7bc37a393741aea36b46ed609c72a1' + } + } + ], + 'Inputs': { + 'Data': '$Var_48d35aae527f439398805f51e5f0cfab' + }, + 'Outputs': { + 'PredictorModel': '$Var_dd7bc37a393741aea36b46ed609c72a1' + }, + 'StratificationColumn': null, + 'NumFolds': 2, + 'Kind': 'SignatureRankerTrainer', + 'LabelColumn': 'Label1', + 'WeightColumn': null, + 'GroupColumn': { + 'Value': 'GroupId1', + 'IsExplicit': true + }, + 'NameColumn': { + 'Value': 'Workclass', + 'IsExplicit': true + } + }, + 'Outputs': { + 'PredictorModel': '$Var_48c4f33d44c1437bb792a8640703e21e', + 'Warnings': '$Var_8f3381cecb1b48dda606a58d153dc022', + 'OverallMetrics': '$overallMetrics', + 'PerInstanceMetrics': '$perInstanceMetric', + 'ConfusionMatrix': '$Var_a06599dcbf52480a8dbb5f7414ee08fe' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("overallMetrics"); + + var schema = data.Schema; + var b = schema.TryGetColumnIndex("NDCG", out int metricCol); + Assert.True(b); + b = schema.TryGetColumnIndex("Fold Index", out int foldCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) + { + var getter = cursor.GetGetter>(metricCol); + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; + + // Get the verage. + b = cursor.MoveNext(); + Assert.True(b); + var avg = default(VBuffer); + getter(ref avg); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); + + // Get the standard deviation. + b = cursor.MoveNext(); + Assert.True(b); + var stdev = default(VBuffer); + getter(ref stdev); + foldGetter(ref fold); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); + var stdevValues = stdev.GetValues(); + Assert.Equal(2.462, stdevValues[0], 3); + Assert.Equal(2.763, stdevValues[1], 3); + Assert.Equal(3.273, stdevValues[2], 3); + + var sumBldr = new BufferBuilder(R8Adder.Instance); + sumBldr.Reset(avg.Length, true); + var val = default(VBuffer); + for (int f = 0; f < 2; f++) + { + b = cursor.MoveNext(); + Assert.True(b); + getter(ref val); + foldGetter(ref fold); + sumBldr.AddFeatures(0, in val); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); + } + var sum = default(VBuffer); + sumBldr.GetResult(ref sum); + + var avgValues = avg.GetValues(); + var sumValues = sum.GetValues(); + for (int i = 0; i < avgValues.Length; i++) + Assert.Equal(avgValues[i], sumValues[i] / 2); + b = cursor.MoveNext(); + Assert.False(b); + } + + data = runner.GetOutput("perInstanceMetric"); + Assert.True(data.Schema.TryGetColumnIndex("Instance", out int nameCol)); + using (var cursor = data.GetRowCursor(col => col == nameCol)) + { + var getter = cursor.GetGetter>(nameCol); + while (cursor.MoveNext()) + { + ReadOnlyMemory name = default; + getter(ref name); + Assert.Subset(new HashSet() { "Private", "?", "Federal-gov" }, new HashSet() { name.ToString() }); + if (cursor.Position > 4) + break; + } + } + } + + [Fact] + public void TestOvaMacro() + { + var dataPath = GetDataPath(@"iris.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': null, + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Features', + 'Type': null, + 'Source': [{ + 'Min': 1, + 'Max': 4, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_672f860e44304ba8bd1c1a6e4b5ba9c5' + } + }, { + 'Name': 'Models.OneVersusAll', + 'Inputs': { + 'Nodes': [{ + 'Name': 'Trainers.StochasticDualCoordinateAscentBinaryClassifier', + 'Inputs': { + 'LossFunction': { + 'Name': 'LogLoss', + 'Settings': {} + }, + 'PositiveInstanceWeight': 1.0, + 'Calibrator': { + 'Name': 'PlattCalibrator', + 'Settings': {} + }, + 'MaxCalibrationExamples': 1000000, + 'L2Const': null, + 'L1Threshold': null, + 'NumThreads': 1, + 'ConvergenceTolerance': 0.1, + 'MaxIterations': null, + 'Shuffle': true, + 'CheckFrequency': null, + 'BiasLearningRate': 0.0, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_9aa1732198964d7f979a0bbec5db66c2', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_6219b70478204e599cef4ab3672656ff' + } + } + ], + 'OutputForSubGraph': { + 'Model': '$Var_a229f40df6494a93a794ffd5480d5549' + }, + 'UseProbabilities': true, + 'WeightColumn': null, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_672f860e44304ba8bd1c1a6e4b5ba9c5', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_a8423859a7994667b7f1075f8b7b0194' + } + }, { + 'Name': 'Transforms.DatasetScorer', + 'Inputs': { + 'Data': '$Var_672f860e44304ba8bd1c1a6e4b5ba9c5', + 'PredictorModel': '$Var_a8423859a7994667b7f1075f8b7b0194', + 'Suffix': null + }, + 'Outputs': { + 'ScoredData': '$Var_5454fd8c353c40288dd8c2d104be788f', + 'ScoringTransform': '$Var_df35ea0b6e814ed5a1d8f9d673a663b1' + } + }, { + 'Name': 'Models.ClassificationEvaluator', + 'Inputs': { + 'OutputTopKAcc': null, + 'NumTopClassesToOutput': 3, + 'NumClassesConfusionMatrix': 10, + 'OutputPerClassStatistics': false, + 'LabelColumn': null, + 'WeightColumn': null, + 'ScoreColumn': null, + 'StratColumn': null, + 'Data': '$Var_5454fd8c353c40288dd8c2d104be788f', + 'NameColumn': 'Name' + }, + 'Outputs': { + 'ConfusionMatrix': '$Var_ed441dd1ebcc46f7bf7e096d18b33fd7', + 'Warnings': '$Var_bec6f9da6bd647808c4a7a05b7e8b1be', + 'OverallMetrics': '$overallMetrics', + 'PerInstanceMetrics': '$Var_cfcc191521dd45c58ed6654ced067a28' + } + } + ] + } + "; + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("overallMetrics"); + var schema = data.Schema; + var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == accCol)) + { + var getter = cursor.GetGetter(accCol); + b = cursor.MoveNext(); + Assert.True(b); + double acc = 0; + getter(ref acc); + Assert.Equal(0.96, acc, 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [Fact] + public void TestOvaMacroWithUncalibratedLearner() + { + var dataPath = GetDataPath(@"iris.txt"); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': null, + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Features', + 'Type': null, + 'Source': [{ + 'Min': 1, + 'Max': 4, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_f38b99289df746319edd57a3ccfb85a2' + } + }, { + 'Name': 'Models.OneVersusAll', + 'Inputs': { + 'Nodes': [{ + 'Name': 'Trainers.AveragedPerceptronBinaryClassifier', + 'Inputs': { + 'LossFunction': { + 'Name': 'HingeLoss', + 'Settings': { + 'Margin': 1.0 + } + }, + 'Calibrator': { + 'Name': 'PlattCalibrator', + 'Settings': {} + }, + 'MaxCalibrationExamples': 1000000, + 'LearningRate': 1.0, + 'DecreaseLearningRate': false, + 'ResetWeightsAfterXExamples': null, + 'DoLazyUpdates': true, + 'L2RegularizerWeight': 0.0, + 'RecencyGain': 0.0, + 'RecencyGainMulti': false, + 'Averaged': true, + 'AveragedTolerance': 0.01, + 'NumIterations': 1, + 'InitialWeights': null, + 'InitWtsDiameter': 0.0, + 'Shuffle': false, + 'StreamingCacheSize': 1000000, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_9ccc8bce4f6540eb8a244ab40585602a', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_4f1c140c153e4b5fb03fbe3ffb97a68b' + } + } + ], + 'OutputForSubGraph': { + 'Model': '$Var_b47f7facc1c540e39d8b82ab64df6592' + }, + 'UseProbabilities': true, + 'WeightColumn': null, + 'LabelColumn': 'Label', + 'TrainingData': '$Var_f38b99289df746319edd57a3ccfb85a2', + 'FeatureColumn': 'Features', + 'NormalizeFeatures': 'Auto', + 'Caching': 'Auto' + }, + 'Outputs': { + 'PredictorModel': '$Var_d67eb393a0e849c2962961c174eab3da' + } + }, { + 'Name': 'Transforms.DatasetScorer', + 'Inputs': { + 'Data': '$Var_f38b99289df746319edd57a3ccfb85a2', + 'PredictorModel': '$Var_d67eb393a0e849c2962961c174eab3da', + 'Suffix': null + }, + 'Outputs': { + 'ScoredData': '$Var_a20e37dc58d84bf5a1cb13ed13eae5ba', + 'ScoringTransform': '$Var_49f9a4a57ff043cda5947704678241a0' + } + }, { + 'Name': 'Models.ClassificationEvaluator', + 'Inputs': { + 'OutputTopKAcc': null, + 'NumTopClassesToOutput': 3, + 'NumClassesConfusionMatrix': 10, + 'OutputPerClassStatistics': false, + 'LabelColumn': null, + 'WeightColumn': null, + 'ScoreColumn': null, + 'StratColumn': null, + 'Data': '$Var_a20e37dc58d84bf5a1cb13ed13eae5ba', + 'NameColumn': 'Name' + }, + 'Outputs': { + 'ConfusionMatrix': '$Var_7db29303b67942e2a6267c20b9c4be77', + 'Warnings': '$Var_7751126378244c2385940cdf5a0e76e6', + 'OverallMetrics': '$overallMetrics', + 'PerInstanceMetrics': '$Var_67109dcdce504a0894a5c2f5616d21f9' + } + } + ] + } + "; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("overallMetrics"); + var schema = data.Schema; + var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); + Assert.True(b); + using (var cursor = data.GetRowCursor(col => col == accCol)) + { + var getter = cursor.GetGetter(accCol); + b = cursor.MoveNext(); + Assert.True(b); + double acc = 0; + getter(ref acc); + Assert.Equal(0.71, acc, 2); + b = cursor.MoveNext(); + Assert.False(b); + } + } + + [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // TensorFlow is 64-bit only + public void TestTensorFlowEntryPoint() + { + var dataPath = GetDataPath("Train-Tiny-28x28.txt"); + Env.ComponentCatalog.RegisterAssembly(typeof(TensorFlowTransform).Assembly); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'Label', + 'Type': null, + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Placeholder', + 'Type': null, + 'Source': [{ + 'Min': 1, + 'Max': 784, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$Var_2802f3e485814063828c2303ec60327c' + } + }, { + 'Name': 'Transforms.TensorFlowScorer', + 'Inputs': { + 'ModelLocation': 'mnist_model/frozen_saved_model.pb', + 'InputColumns': [ + 'Placeholder' + ], + 'OutputColumns': [ + 'Softmax' + ], + 'LabelColumn': null, + 'TensorFlowLabel': null, + 'OptimizationOperation': null, + 'LossOperation': null, + 'MetricOperation': null, + 'BatchSize': 64, + 'Epoch': 5, + 'LearningRateOperation': null, + 'LearningRate': 0.01, + 'SaveLocationOperation': 'save/Const', + 'SaveOperation': 'save/control_dependency', + 'ReTrain': false, + 'Data': '$Var_2802f3e485814063828c2303ec60327c' + }, + 'Outputs': { + 'OutputData': '$outputData', + 'Model': '$Var_c3a191a107c54725acc49e432bfdf104' + } + } + ] + } + "; + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(Env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(Env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("outputData"); + + var schema = data.Schema; + Assert.Equal(3, schema.Count); + Assert.Equal("Softmax", schema[2].Name); + Assert.Equal(10, (schema[2].Type as VectorType)?.Size); + } } -#pragma warning restore 612 } \ No newline at end of file diff --git a/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj b/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj index bdf496e780..d3db320af4 100644 --- a/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj +++ b/test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj @@ -22,20 +22,8 @@ - - - - - - - - - - - - @@ -43,4 +31,4 @@ - + \ No newline at end of file diff --git a/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs b/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs index fbd9a468ba..df0bfd2185 100644 --- a/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs +++ b/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs @@ -56,19 +56,16 @@ namespace Microsoft.ML.FSharp.Tests #nowarn "44" open System open Microsoft.ML -open Microsoft.ML.Legacy.Data -open Microsoft.ML.Legacy.Trainers -open Microsoft.ML.Legacy.Transforms open Microsoft.ML.Data open Xunit module SmokeTest1 = type SentimentData() = - [] + [] + val mutable Sentiment : bool + [] val mutable SentimentText : string - [] - val mutable Sentiment : float32 type SentimentPrediction() = [] @@ -77,51 +74,23 @@ module SmokeTest1 = [] let ``FSharp-Sentiment-Smoke-Test`` () = - // See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies - let _load = - [ typeof; - typeof; - typeof] // ML.EntryPoints - let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv" - let pipeline = Legacy.LearningPipeline() - - pipeline.Add( - Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom( - Arguments = - TextLoaderArguments( - HasHeader = true, - Column = [| TextLoaderColumn(Name = "Label", - Source = [| TextLoaderRange(0) |], - Type = Nullable (Legacy.Data.DataKind.Num)) - TextLoaderColumn(Name = "SentimentText", - Source = [| TextLoaderRange(1) |], - Type = Nullable (Legacy.Data.DataKind.Text)) |] - ))) - - pipeline.Add( - TextFeaturizer( - "Features", [| "SentimentText" |], - KeepPunctuations = false, - OutputTokens = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2 - )) - - pipeline.Add( - FastTreeBinaryClassifier( - NumLeaves = 5, - NumTrees = 5, - MinDocumentsInLeafs = 2 - )) - - let model = pipeline.Train() + let ml = MLContext(seed = new System.Nullable(1), conc = 1) + let data = ml.Data.ReadFromTextFile(testDataPath, hasHeader = true) + + let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") + .Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5)) + + let model = pipeline.Fit(data) + let engine = model.CreatePredictionEngine(ml) + let predictions = [ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.") SentimentData(SentimentText = "Sort of ok") SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.") ] - |> model.Predict + |> List.map engine.Predict let predictionResults = [ for p in predictions -> p.Sentiment ] Assert.Equal(predictionResults, [ false; true; true ]) @@ -131,11 +100,11 @@ module SmokeTest2 = [] type SentimentData = - { [] - SentimentText : string - - [] - Sentiment : float32 } + { [] + Sentiment : bool + + [] + SentimentText : string } [] type SentimentPrediction = @@ -145,51 +114,23 @@ module SmokeTest2 = [] let ``FSharp-Sentiment-Smoke-Test`` () = - // See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies - let _load = - [ typeof; - typeof; - typeof] // ML.EntryPoints - let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv" + + let ml = MLContext(seed = new System.Nullable(1), conc = 1) + let data = ml.Data.ReadFromTextFile(testDataPath, hasHeader = true) + + let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") + .Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5)) + + let model = pipeline.Fit(data) - let pipeline = Legacy.LearningPipeline() - - pipeline.Add( - Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom( - Arguments = - TextLoaderArguments( - HasHeader = true, - Column = [| TextLoaderColumn(Name = "Label", - Source = [| TextLoaderRange(0) |], - Type = Nullable (Legacy.Data.DataKind.Num)) - TextLoaderColumn(Name = "SentimentText", - Source = [| TextLoaderRange(1) |], - Type = Nullable (Legacy.Data.DataKind.Text)) |] - ))) - - pipeline.Add( - TextFeaturizer( - "Features", [| "SentimentText" |], - KeepPunctuations = false, - OutputTokens = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2 - )) - - pipeline.Add( - FastTreeBinaryClassifier( - NumLeaves = 5, - NumTrees = 5, - MinDocumentsInLeafs = 2 - )) - - let model = pipeline.Train() + let engine = model.CreatePredictionEngine(ml) let predictions = - [ { SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition."; Sentiment = 0.0f } - { SentimentText = "Sort of ok"; Sentiment = 0.0f } - { SentimentText = "Joe versus the Volcano Coffee Company is a great film."; Sentiment = 0.0f } ] - |> model.Predict + [ { SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition."; Sentiment = false } + { SentimentText = "Sort of ok"; Sentiment = false } + { SentimentText = "Joe versus the Volcano Coffee Company is a great film."; Sentiment = false } ] + |> List.map engine.Predict let predictionResults = [ for p in predictions -> p.Sentiment ] Assert.Equal(predictionResults, [ false; true; true ]) @@ -197,11 +138,11 @@ module SmokeTest2 = module SmokeTest3 = type SentimentData() = - [] - member val SentimentText = "".AsMemory() with get, set + [] + member val Sentiment = false with get, set - [] - member val Sentiment = 0.0 with get, set + [] + member val SentimentText = "".AsMemory() with get, set type SentimentPrediction() = [] @@ -210,51 +151,23 @@ module SmokeTest3 = [] let ``FSharp-Sentiment-Smoke-Test`` () = - // See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies - let _load = - [ typeof; - typeof; - typeof] // ML.EntryPoints - let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv" - let pipeline = Legacy.LearningPipeline() - - pipeline.Add( - Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom( - Arguments = - TextLoaderArguments( - HasHeader = true, - Column = [| TextLoaderColumn(Name = "Label", - Source = [| TextLoaderRange(0) |], - Type = Nullable (Legacy.Data.DataKind.Num)) - TextLoaderColumn(Name = "SentimentText", - Source = [| TextLoaderRange(1) |], - Type = Nullable (Legacy.Data.DataKind.Text)) |] - ))) - - pipeline.Add( - TextFeaturizer( - "Features", [| "SentimentText" |], - KeepPunctuations = false, - OutputTokens = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2 - )) - - pipeline.Add( - FastTreeBinaryClassifier( - NumLeaves = 5, - NumTrees = 5, - MinDocumentsInLeafs = 2 - )) - - let model = pipeline.Train() + let ml = MLContext(seed = new System.Nullable(1), conc = 1) + let data = ml.Data.ReadFromTextFile(testDataPath, hasHeader = true) + + let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") + .Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5)) + + let model = pipeline.Fit(data) + + let engine = model.CreatePredictionEngine(ml) let predictions = [ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.".AsMemory()) SentimentData(SentimentText = "Sort of ok".AsMemory()) SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.".AsMemory()) ] - |> model.Predict + |> List.map engine.Predict let predictionResults = [ for p in predictions -> p.Sentiment ] Assert.Equal(predictionResults, [ false; true; true ]) diff --git a/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs b/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs index 11580d262f..84841a37aa 100644 --- a/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs +++ b/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs @@ -9,10 +9,10 @@ using Microsoft.ML; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Model; using Microsoft.ML.OnnxTransform.StaticPipe; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; @@ -107,7 +107,7 @@ public void OnnxStatic() var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); - var data = TextLoader.CreateReader(env, ctx => ( + var data = TextLoaderStatic.CreateReader(env, ctx => ( imagePath: ctx.LoadText(0), name: ctx.LoadText(1))) .Read(dataFile); diff --git a/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj b/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj index 32a0d50b43..78a8727ee8 100644 --- a/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj +++ b/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj @@ -6,6 +6,7 @@ + diff --git a/test/Microsoft.ML.OnnxTransformTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformTest/OnnxTransformTests.cs index 34d5c40b46..0954396c15 100644 --- a/test/Microsoft.ML.OnnxTransformTest/OnnxTransformTests.cs +++ b/test/Microsoft.ML.OnnxTransformTest/OnnxTransformTests.cs @@ -10,10 +10,10 @@ using Microsoft.ML; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Model; using Microsoft.ML.OnnxTransform.StaticPipe; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Xunit; @@ -126,8 +126,10 @@ void TestSimpleCase() catch (InvalidOperationException) { } } - [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format." - void TestOldSavingAndLoading() + [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format." + [InlineData(null, false)] + [InlineData(null, true)] + void TestOldSavingAndLoading(int? gpuDeviceId, bool fallbackToCpu) { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) return; @@ -146,7 +148,7 @@ void TestOldSavingAndLoading() var inputNames = new[] { "data_0" }; var outputNames = new[] { "softmaxout_1" }; - var est = new OnnxScoringEstimator(Env, modelFile, inputNames, outputNames); + var est = new OnnxScoringEstimator(Env, modelFile, inputNames, outputNames, gpuDeviceId, fallbackToCpu); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -198,7 +200,7 @@ public void OnnxStatic() var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); - var data = TextLoader.CreateReader(env, ctx => ( + var data = TextLoaderStatic.CreateReader(env, ctx => ( imagePath: ctx.LoadText(0), name: ctx.LoadText(1))) .Read(dataFile); @@ -236,7 +238,7 @@ void TestCommandLine() return; var env = new MLContext(); - var x = Maml.Main(new[] { @"showschema loader=Text{col=data_0:R4:0-150527} xf=Onnx{InputColumns={data_0} OutputColumns={softmaxout_1} model={squeezenet/00000001/model.onnx}}" }); + var x = Maml.Main(new[] { @"showschema loader=Text{col=data_0:R4:0-150527} xf=Onnx{InputColumns={data_0} OutputColumns={softmaxout_1} model={squeezenet/00000001/model.onnx} GpuDeviceId=0 FallbackToCpu=+}" }); Assert.Equal(0, x); } diff --git a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj index 8f1cc4be39..50abe5ee21 100644 --- a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj +++ b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj @@ -12,7 +12,6 @@ - diff --git a/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs b/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs index cc2764d0f7..0f9158a453 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.ImageAnalytics; +using Microsoft.ML.StaticPipe; using Xunit; using Xunit.Abstractions; @@ -21,7 +21,7 @@ public void SimpleImageSmokeTest() { var env = new MLContext(0); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, ctx => ctx.LoadText(0).LoadAsImage().AsGrayscale().Resize(10, 8).ExtractPixels()); var schema = reader.AsDynamic.GetOutputSchema(); @@ -35,7 +35,7 @@ public void SimpleImageSmokeTest() Assert.Equal(8, vecType.Dimensions[1]); Assert.Equal(10, vecType.Dimensions[2]); - var readAsImage = TextLoader.CreateReader(env, + var readAsImage = TextLoaderStatic.CreateReader(env, ctx => ctx.LoadText(0).LoadAsImage()); var est = readAsImage.MakeNewEstimator().Append(r => r.AsGrayscale().Resize(10, 8).ExtractPixels()); var pipe= readAsImage.Append(est); diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs index 9061dbaa09..2782fa339c 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs @@ -63,7 +63,7 @@ public void SimpleTextLoaderCopyColumnsTest() + "1 1 2 4 15"; var dataSource = new BytesStreamSource(data); - var text = TextLoader.CreateReader(env, ctx => ( + var text = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1), numericFeatures: ctx.LoadFloat(2, null)), // If fit correctly, this ought to be equivalent to max of 4, that is, length of 3. @@ -172,7 +172,7 @@ void Helper(Schema thisSchema, string name, ColumnType expected) Assert.Equal(expected, thisSchema[thisCol].Type); } - var text = TextLoader.CreateReader(env, ctx => ( + var text = TextLoaderStatic.CreateReader(env, ctx => ( yo: new Obnoxious1(ctx.LoadText(0), ctx.LoadFloat(1, 5)), dawg: new Obnoxious2() { Biz = ctx.LoadText(2), Blam = ctx.LoadDouble(1, 2) }, how: MakeObnoxious3(ctx.LoadBool(2), new Obnoxious1(ctx.LoadText(0), ctx.LoadFloat(1, 4)), @@ -365,7 +365,7 @@ public void Normalizer() var dataPath = GetDataPath("generated_regression_dataset.csv"); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); var data = reader.Read(dataSource); @@ -390,7 +390,7 @@ public void NormalizerWithOnFit() var dataPath = GetDataPath("generated_regression_dataset.csv"); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => c.LoadFloat(0, 2), separator: ';', hasHeader: true); var data = reader.Read(dataSource); @@ -432,7 +432,7 @@ public void ToKey() { var env = new MLContext(0); var dataPath = GetDataPath("iris.data"); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(4), values: c.LoadFloat(0, 3)), separator: ','); var dataSource = new MultiFileSource(dataPath); @@ -470,7 +470,7 @@ public void ConcatWith() { var env = new MLContext(0); var dataPath = GetDataPath("iris.data"); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(4), values: c.LoadFloat(0, 3), value: c.LoadFloat(2)), separator: ','); var dataSource = new MultiFileSource(dataPath); @@ -508,7 +508,7 @@ public void Tokenize() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -535,7 +535,7 @@ public void NormalizeTextAndRemoveStopWords() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -564,7 +564,7 @@ public void ConvertToWordBag() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -593,7 +593,7 @@ public void Ngrams() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -625,7 +625,7 @@ public void LpGcNormAndWhitening() var dataPath = GetDataPath("generated_regression_dataset.csv"); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); var data = reader.Read(dataSource); @@ -661,7 +661,7 @@ public void LdaTopicModel() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -689,7 +689,7 @@ public void FeatureSelection() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -722,7 +722,7 @@ public void TrainTestSplit() var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(0), features: c.LoadFloat(1, 4))); var data = reader.Read(dataSource); @@ -750,7 +750,7 @@ public void PrincipalComponentAnalysis() var dataPath = GetDataPath("generated_regression_dataset.csv"); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); var data = reader.Read(dataSource); @@ -772,7 +772,7 @@ public void NAIndicatorStatic() var env = new MLContext(0); string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), @@ -815,7 +815,7 @@ public void TextNormalizeStatic() { var env = new MLContext(0); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(env, ctx => ( + var reader = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); @@ -844,7 +844,7 @@ public void TestPcaStatic() { var env = new MLContext(0); var dataSource = GetDataPath("generated_regression_dataset.csv"); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); var data = reader.Read(dataSource); diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs index e1ed6e9810..db5c472fcc 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs @@ -38,7 +38,7 @@ public void SdcaRegression() var ctx = new RegressionContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); @@ -81,7 +81,7 @@ public void SdcaRegressionNameCollision() var ctx = new RegressionContext(env); // Here we introduce another column called "Score" to collide with the name of the default output. Heh heh heh... - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10), Score: c.LoadText(2)), separator: ';', hasHeader: true); @@ -110,7 +110,7 @@ public void SdcaBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryModelParameters pred = null; @@ -156,7 +156,7 @@ public void SdcaBinaryClassificationNoCalibration() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryModelParameters pred = null; @@ -200,7 +200,7 @@ public void AveragePerceptronNoCalibration() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryModelParameters pred = null; @@ -236,7 +236,7 @@ public void AveragePerceptronCalibration() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); LinearBinaryModelParameters pred = null; @@ -272,7 +272,7 @@ public void FfmBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features1: c.LoadFloat(1, 4), features2: c.LoadFloat(5, 9))); FieldAwareFactorizationMachineModelParameters pred = null; @@ -304,7 +304,7 @@ public void SdcaMulticlass() var dataSource = new MultiFileSource(dataPath); var ctx = new MulticlassClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); MulticlassLogisticRegressionModelParameters pred = null; @@ -354,7 +354,7 @@ public void CrossValidate() var dataSource = new MultiFileSource(dataPath); var ctx = new MulticlassClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); var est = reader.MakeNewEstimator() @@ -378,7 +378,7 @@ public void FastTreeBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights pred = null; @@ -418,7 +418,7 @@ public void FastTreeRegression() var ctx = new RegressionContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); @@ -459,7 +459,7 @@ public void LightGbmBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights pred = null; @@ -500,7 +500,7 @@ public void LightGbmRegression() var ctx = new RegressionContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); @@ -542,7 +542,7 @@ public void PoissonRegression() var ctx = new RegressionContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); @@ -584,7 +584,7 @@ public void LogisticRegressionBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights pred = null; @@ -623,7 +623,7 @@ public void MulticlassLogisticRegression() var dataSource = new MultiFileSource(dataPath); var ctx = new MulticlassClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); MulticlassLogisticRegressionModelParameters pred = null; @@ -668,7 +668,7 @@ public void OnlineGradientDescent() var ctx = new RegressionContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true); @@ -709,7 +709,7 @@ public void KMeans() var dataPath = GetDataPath(TestDatasets.iris.trainFilename); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); KMeansModelParameters pred = null; @@ -766,7 +766,7 @@ public void FastTreeRanking() var ctx = new RankingContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(0), features: c.LoadFloat(9, 14), groupId: c.LoadText(1)), separator: '\t', hasHeader: true); @@ -807,7 +807,7 @@ public void LightGBMRanking() var ctx = new RankingContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadFloat(0), features: c.LoadFloat(9, 14), groupId: c.LoadText(1)), separator: '\t', hasHeader: true); @@ -847,7 +847,7 @@ public void MultiClassLightGBM() var dataSource = new MultiFileSource(dataPath); var ctx = new MulticlassClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); OvaModelParameters pred = null; @@ -885,7 +885,7 @@ public void MultiClassNaiveBayesTrainer() var dataSource = new MultiFileSource(dataPath); var ctx = new MulticlassClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadText(0), features: c.LoadFloat(1, 4))); MultiClassNaiveBayesModelParameters pred = null; @@ -930,7 +930,7 @@ public void HogwildSGDBinaryClassification() var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights pred = null; diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs index a9bef25f93..1f6781c274 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs @@ -617,7 +617,7 @@ protected bool CheckMetadataNames(string kind, int size, Schema sch1, Schema sch Fail("Different {0} metadata types: {0} vs {1}", kind, t1, t2); return Failed(); } - if (!t1.ItemType.IsText) + if (!(t1.ItemType is TextType)) { if (!mustBeText) { diff --git a/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs b/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs index 0302b6c2a4..1b8cbdcb60 100644 --- a/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs +++ b/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs @@ -25,9 +25,6 @@ public static TEnvironment AddStandardComponents(this TEnvironment env.ComponentCatalog.RegisterAssembly(typeof(EnsembleModelParameters).Assembly); // ML.Ensemble env.ComponentCatalog.RegisterAssembly(typeof(KMeansModelParameters).Assembly); // ML.KMeansClustering env.ComponentCatalog.RegisterAssembly(typeof(PcaModelParameters).Assembly); // ML.PCA -#pragma warning disable 612 - env.ComponentCatalog.RegisterAssembly(typeof(Experiment).Assembly); // ML.Legacy -#pragma warning restore 612 env.ComponentCatalog.RegisterAssembly(typeof(CVSplit).Assembly); // ML.EntryPoints return env; } diff --git a/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj b/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj index 62ae1cea6a..3854fc5f67 100644 --- a/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj +++ b/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj @@ -15,7 +15,6 @@ - diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs deleted file mode 100644 index 3f6811185e..0000000000 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ /dev/null @@ -1,282 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using Microsoft.ML.Data; -using Microsoft.ML.EntryPoints; -using Microsoft.ML.Legacy.Data; - -namespace Microsoft.ML.TestFramework -{ -#pragma warning disable 612, 618 - public static class ModelHelper - { - private static MLContext s_environment = new MLContext(seed: 1); - private static TransformModel s_housePriceModel; - - public static void WriteKcHousePriceModel(string dataPath, string outputModelPath) - { - if (File.Exists(outputModelPath)) - { - File.Delete(outputModelPath); - } - - using (var saveStream = File.OpenWrite(outputModelPath)) - { - WriteKcHousePriceModel(dataPath, saveStream); - } - } - - public static void WriteKcHousePriceModel(string dataPath, Stream stream) - { - if (s_housePriceModel == null) - { - s_housePriceModel = CreateKcHousePricePredictorModel(dataPath); - } - s_housePriceModel.Save(s_environment, stream); - } - - public static IDataView GetKcHouseDataView(string dataPath) - { - return s_environment.Data.ReadFromTextFile(dataPath, - columns: new[] - { - new Data.TextLoader.Column("Id", Data.DataKind.TX, 0), - new Data.TextLoader.Column("Date", Data.DataKind.TX, 1), - new Data.TextLoader.Column("Label", Data.DataKind.R4, 2), - new Data.TextLoader.Column("BedRooms", Data.DataKind.R4, 3), - new Data.TextLoader.Column("BathRooms", Data.DataKind.R4, 4), - new Data.TextLoader.Column("SqftLiving", Data.DataKind.R4, 5), - new Data.TextLoader.Column("SqftLot", Data.DataKind.R4, 6), - new Data.TextLoader.Column("Floors", Data.DataKind.R4, 7), - new Data.TextLoader.Column("WaterFront", Data.DataKind.R4, 8), - new Data.TextLoader.Column("View", Data.DataKind.R4, 9), - new Data.TextLoader.Column("Condition", Data.DataKind.R4, 10), - new Data.TextLoader.Column("Grade", Data.DataKind.R4, 11), - new Data.TextLoader.Column("SqftAbove", Data.DataKind.R4, 12), - new Data.TextLoader.Column("SqftBasement", Data.DataKind.R4, 13), - new Data.TextLoader.Column("YearBuilt", Data.DataKind.R4, 14), - new Data.TextLoader.Column("YearRenovated", Data.DataKind.R4, 15), - new Data.TextLoader.Column("Zipcode", Data.DataKind.R4, 16), - new Data.TextLoader.Column("Lat", Data.DataKind.R4, 17), - new Data.TextLoader.Column("Long", Data.DataKind.R4, 18), - new Data.TextLoader.Column("SqftLiving15", Data.DataKind.R4, 19), - new Data.TextLoader.Column("SqftLot15", Data.DataKind.R4, 20) - }, - hasHeader: true, - separatorChar: ',' - ); - } - - private static TransformModel CreateKcHousePricePredictorModel(string dataPath) - { - Experiment experiment = s_environment.CreateExperiment(); - var importData = new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { ',' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Id", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Text - }, - - new TextLoaderColumn() - { - Name = "Date", - Source = new [] { new TextLoaderRange(1) }, - Type = Legacy.Data.DataKind.Text - }, - - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(2) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Bedrooms", - Source = new [] { new TextLoaderRange(3) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Bathrooms", - Source = new [] { new TextLoaderRange(4) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftLiving", - Source = new [] { new TextLoaderRange(5) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftLot", - Source = new [] { new TextLoaderRange(6) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Floors", - Source = new [] { new TextLoaderRange(7) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Waterfront", - Source = new [] { new TextLoaderRange(8) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "View", - Source = new [] { new TextLoaderRange(9) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Condition", - Source = new [] { new TextLoaderRange(10) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Grade", - Source = new [] { new TextLoaderRange(11) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftAbove", - Source = new [] { new TextLoaderRange(12) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftBasement", - Source = new [] { new TextLoaderRange(13) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "YearBuilt", - Source = new [] { new TextLoaderRange(14) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "YearRenovated", - Source = new [] { new TextLoaderRange(15) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Zipcode", - Source = new [] { new TextLoaderRange(16) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Lat", - Source = new [] { new TextLoaderRange(17) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Long", - Source = new [] { new TextLoaderRange(18) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftLiving15", - Source = new [] { new TextLoaderRange(19) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "SqftLot15", - Source = new [] { new TextLoaderRange(20) }, - Type = Legacy.Data.DataKind.Num - }, - } - } - - //new Data.CustomTextLoader(); - // importData.CustomSchema = dataSchema; - // - }; - - Legacy.Data.TextLoader.Output imported = experiment.Add(importData); - var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator(); - numericalConcatenate.Data = imported.Data; - numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); - Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate); - - var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator(); - categoryConcatenate.Data = numericalConcatenated.OutputData; - categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"); - Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate); - - var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer(); - categorize.AddColumn("CategoryFeatures"); - categorize.Data = categoryConcatenated.OutputData; - Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize); - - var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator(); - featuresConcatenate.Data = categorized.OutputData; - featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures"); - Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate); - - var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor(); - learner.TrainingData = featuresConcatenated.OutputData; - learner.NumThreads = 1; - Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner); - - var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner(); - combineModels.TransformModels = new ArrayVar(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model); - combineModels.PredictorModel = learnerOutput.PredictorModel; - Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels); - - var scorer = new Legacy.Transforms.Scorer - { - PredictorModel = combinedModels.PredictorModel - }; - - var scorerOutput = experiment.Add(scorer); - experiment.Compile(); - experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false)); - experiment.Run(); - - return experiment.GetOutput(scorerOutput.ScoringTransform); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs deleted file mode 100644 index cd52060cb5..0000000000 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using Microsoft.ML.RunTests; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.Tests -{ - public class CSharpCodeGen : BaseTestBaseline - { - public CSharpCodeGen(ITestOutputHelper output) : base(output) - { - } - - [Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] - public void RegenerateCSharpApi() - { - var basePath = GetDataPath("../../src/Microsoft.ML.Legacy/CSharpApi.cs"); - Tools.Maml.Main(new[] { $"? generator=cs{{csFilename={basePath}}}" }); - } - - [ConditionalFact(typeof(BaseTestBaseline), nameof(LessThanNetCore30OrNotNetCore))] - public void TestGeneratedCSharpAPI() - { - var dataPath = GetOutputPath("Api.cs"); - Tools.Maml.Main(new[] { $"? generator=cs{{csFilename={dataPath}}}" }); - - var basePath = GetDataPath("../../src/Microsoft.ML.Legacy/CSharpApi.cs"); - using (StreamReader baseline = OpenReader(basePath)) - using (StreamReader result = OpenReader(dataPath)) - { - for (; ; ) - { - string line1 = baseline.ReadLine(); - string line2 = result.ReadLine(); - - if (line1 == null && line2 == null) - break; - Assert.Equal(line1, line2); - } - } - } - } -} diff --git a/test/Microsoft.ML.Tests/CachingTests.cs b/test/Microsoft.ML.Tests/CachingTests.cs index c0dffd8c4f..5997663f2f 100644 --- a/test/Microsoft.ML.Tests/CachingTests.cs +++ b/test/Microsoft.ML.Tests/CachingTests.cs @@ -6,6 +6,7 @@ using System.Threading; using Microsoft.ML.Data; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Xunit; using Xunit.Abstractions; @@ -84,7 +85,7 @@ public void StaticDataCacheTest() var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename); var dataSource = new MultiFileSource(dataPath); - var reader = TextLoader.CreateReader(env, + var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); var data = reader.Read(dataSource); diff --git a/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs b/test/Microsoft.ML.Tests/CollectionsDataViewTest.cs similarity index 70% rename from test/Microsoft.ML.Tests/CollectionDataSourceTests.cs rename to test/Microsoft.ML.Tests/CollectionsDataViewTest.cs index 1003620157..1861596cc9 100644 --- a/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs +++ b/test/Microsoft.ML.Tests/CollectionsDataViewTest.cs @@ -7,251 +7,19 @@ using System.Linq; using System.Reflection; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; using Microsoft.ML.TestFramework; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.EntryPoints.Tests { -#pragma warning disable 612 - public class CollectionDataSourceTests : BaseTestClass + public class CollectionsDataViewTest : BaseTestClass { - public CollectionDataSourceTests(ITestOutputHelper output) + public CollectionsDataViewTest(ITestOutputHelper output) : base(output) { } - [Fact] - public void CheckConstructor() - { - Assert.NotNull(CollectionDataSource.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(CollectionDataSource.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(CollectionDataSource.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); - - bool thrown = false; - try - { - CollectionDataSource.Create(new List()); - } - catch - { - thrown = true; - } - Assert.True(thrown); - - thrown = false; - try - { - CollectionDataSource.Create(new Input[0]); - } - catch - { - thrown = true; - } - Assert.True(thrown); - } - - [Fact] - public void CanSuccessfullyApplyATransform() - { - var collection = CollectionDataSource.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = (Legacy.ILearningPipelineDataStep)collection.ApplyStep(null, experiment); - - Assert.NotNull(output.Data); - Assert.NotNull(output.Data.VarName); - Assert.Null(output.Model); - } - - [Fact] - public void CanSuccessfullyEnumerated() - { - var collection = CollectionDataSource.Create(new List() { - new Input { Number1 = 1, String1 = "1" }, - new Input { Number1 = 2, String1 = "2" }, - new Input { Number1 = 3, String1 = "3" } - }); - - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep; - - experiment.Compile(); - collection.SetInput(environment, experiment); - experiment.Run(); - - IDataView data = experiment.GetOutput(output.Data); - Assert.NotNull(data); - - using (var cursor = data.GetRowCursor((a => true))) - { - var IDGetter = cursor.GetGetter(0); - var TextGetter = cursor.GetGetter>(1); - - Assert.True(cursor.MoveNext()); - - float ID = 0; - IDGetter(ref ID); - Assert.Equal(1, ID); - - ReadOnlyMemory Text = new ReadOnlyMemory(); - TextGetter(ref Text); - Assert.Equal("1", Text.ToString()); - - Assert.True(cursor.MoveNext()); - - ID = 0; - IDGetter(ref ID); - Assert.Equal(2, ID); - - Text = new ReadOnlyMemory(); - TextGetter(ref Text); - Assert.Equal("2", Text.ToString()); - - Assert.True(cursor.MoveNext()); - - ID = 0; - IDGetter(ref ID); - Assert.Equal(3, ID); - - Text = new ReadOnlyMemory(); - TextGetter(ref Text); - Assert.Equal("3", Text.ToString()); - - Assert.False(cursor.MoveNext()); - } - } - - [Fact] - public void CanTrain() - { - var pipeline = new Legacy.LearningPipeline(); - var data = new List() { - new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength=0.3f, PetalWidth=5.1f, Label=1}, - new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength=0.3f, PetalWidth=5.1f, Label=1}, - new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f, PetalLength=0.3f, PetalWidth=5.1f, Label=0} - }; - var collection = CollectionDataSource.Create(data); - - pipeline.Add(collection); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - var model = pipeline.Train(); - - IrisPrediction prediction = model.Predict(new IrisData() - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }); - - pipeline = new Legacy.LearningPipeline(); - collection = CollectionDataSource.Create(data.AsEnumerable()); - pipeline.Add(collection); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - model = pipeline.Train(); - - prediction = model.Predict(new IrisData() - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }); - - } - - [Fact] - public void CanTrainProperties() - { - var pipeline = new Legacy.LearningPipeline(); - var data = new List() { - new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength=0.3f, PetalWidth=5.1f, Label=1}, - new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength=0.3f, PetalWidth=5.1f, Label=1}, - new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f, PetalLength=0.3f, PetalWidth=5.1f, Label=0} - }; - var collection = CollectionDataSource.Create(data); - - pipeline.Add(collection); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - var model = pipeline.Train(); - - IrisPredictionProperties prediction = model.Predict(new IrisData - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }); - - pipeline = new Legacy.LearningPipeline(); - collection = CollectionDataSource.Create(data.AsEnumerable()); - pipeline.Add(collection); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - model = pipeline.Train(); - - prediction = model.Predict(new IrisData - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }); - - } - - public class Input - { - [LoadColumn(0)] - public float Number1; - - [LoadColumn(1)] - public string String1; - } - - public class IrisData - { - [LoadColumn(0)] - public float Label; - - [LoadColumn(1)] - public float SepalLength; - - [LoadColumn(2)] - public float SepalWidth; - - [LoadColumn(3)] - public float PetalLength; - - [LoadColumn(4)] - public float PetalWidth; - } - - public class IrisPrediction - { - [ColumnName("Score")] - public float[] PredictedLabels; - } - - public class IrisPredictionProperties - { - private float[] _PredictedLabels; - [ColumnName("Score")] - public float[] PredictedLabels { get { return _PredictedLabels; } set { _PredictedLabels = value; } } - } - public class ConversionSimpleClass { public int fInt; @@ -723,5 +491,4 @@ public void PrivateGetSetProperties() } } } -#pragma warning restore 612 } diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs index c3396bdd44..e41cf6954d 100644 --- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs +++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs @@ -30,9 +30,9 @@ public void FeatureContributionEstimatorWorkout() var estPipe = new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn) .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, normalize: false)) - .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, top: 0)) - .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, bottom: 0)) - .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, top: 0, bottom: 0)); + .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, numPositiveContributions: 0)) + .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, numNegativeContributions: 0)) + .Append(new FeatureContributionCalculatingEstimator(ML, model.Model, model.FeatureColumn, numPositiveContributions: 0, numNegativeContributions: 0)); TestEstimatorCore(estPipe, data); Done(); @@ -181,10 +181,10 @@ private void TestFeatureContribution( Assert.NotNull(predictor); // Calculate feature contributions. - var est = new FeatureContributionCalculatingEstimator(ML, predictor, "Features", top: 3, bottom: 0) - .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", top: 0, bottom: 3)) - .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", top: 1, bottom: 1)) - .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", top: 1, bottom: 1, normalize: false)); + var est = new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 3, numNegativeContributions: 0) + .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 0, numNegativeContributions: 3)) + .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1)) + .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1, normalize: false)); TestEstimatorCore(est, data); // Verify output. diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs deleted file mode 100644 index 1d69649fd2..0000000000 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Linq; -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Microsoft.ML.TestFramework; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.EntryPoints.Tests -{ -#pragma warning disable 612, 618 - public class LearningPipelineTests : BaseTestClass - { - public LearningPipelineTests(ITestOutputHelper output) - : base(output) - { - } - - [Fact] - public void ConstructorDoesntThrow() - { - Assert.NotNull(new Legacy.LearningPipeline()); - } - - [Fact] - public void CanAddAndRemoveFromPipeline() - { - var pipeline = new Legacy.LearningPipeline() - { - new CategoricalOneHotVectorizer("String1", "String2"), - new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"), - new StochasticDualCoordinateAscentRegressor() - }; - Assert.NotNull(pipeline); - Assert.Equal(3, pipeline.Count); - - pipeline.Remove(pipeline.ElementAt(2)); - Assert.Equal(2, pipeline.Count); - - pipeline.Add(new Legacy.Trainers.StochasticDualCoordinateAscentRegressor()); - Assert.Equal(3, pipeline.Count); - } - - private class InputData - { - [LoadColumn(columnIndex: 1)] - public string F1; - } - - private class TransformedData - { -#pragma warning disable 649 - [ColumnName("F1")] - public float[] TransformedF1; -#pragma warning restore 649 - } - - [Fact] - public void TransformOnlyPipeline() - { - const string _dataPath = @"..\..\Data\breast-cancer.txt"; - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - pipeline.Add(new ML.Legacy.Data.TextLoader(_dataPath).CreateFrom(useHeader: false)); - pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = OneHotEncodingTransformerOutputKind.Bag }); - var model = pipeline.Train(); - var predictionModel = model.Predict(new InputData() { F1 = "5" }); - - Assert.NotNull(predictionModel); - Assert.NotNull(predictionModel.TransformedF1); - Assert.Equal(1024, predictionModel.TransformedF1.Length); - - for (int index = 0; index < 1024; index++) - if (index == 265) - Assert.Equal(1, predictionModel.TransformedF1[index]); - else - Assert.Equal(0, predictionModel.TransformedF1[index]); - } - - public class Data - { - [ColumnName("Features")] - [VectorType(2)] - public float[] Features; - - [ColumnName("Label")] - public float Label; - } - - public class Prediction - { - [ColumnName("PredictedLabel")] - public bool PredictedLabel; - } - - [Fact] - public void NoTransformPipeline() - { - var data = new Data[1]; - data[0] = new Data - { - Features = new float[] { 0.0f, 1.0f }, - Label = 0f - }; - var pipeline = new Legacy.LearningPipeline(); - pipeline.Add(CollectionDataSource.Create(data)); - pipeline.Add(new FastForestBinaryClassifier()); - var model = pipeline.Train(); - } - - public class BooleanLabelData - { - [ColumnName("Features")] - [VectorType(2)] - public float[] Features; - - [ColumnName("Label")] - public bool Label; - } - - [Fact] - public void BooleanLabelPipeline() - { - var data = new BooleanLabelData[1]; - data[0] = new BooleanLabelData - { - Features = new float[] { 0.0f, 1.0f }, - Label = false - }; - var pipeline = new Legacy.LearningPipeline(); - pipeline.Add(CollectionDataSource.Create(data)); - pipeline.Add(new FastForestBinaryClassifier()); - var model = pipeline.Train(); - } - - [Fact] - public void AppendPipeline() - { - var pipeline = new Legacy.LearningPipeline(); - pipeline.Append(new CategoricalOneHotVectorizer("String1", "String2")) - .Append(new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2")) - .Append(new StochasticDualCoordinateAscentRegressor()); - Assert.NotNull(pipeline); - Assert.Equal(3, pipeline.Count); - - pipeline.Remove(pipeline.ElementAt(2)); - Assert.Equal(2, pipeline.Count); - - pipeline.Append(new StochasticDualCoordinateAscentRegressor()); - Assert.Equal(3, pipeline.Count); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 716138ce8f..52b5283a52 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -14,6 +14,7 @@ + @@ -22,7 +23,6 @@ - diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs new file mode 100644 index 0000000000..c03098ac28 --- /dev/null +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -0,0 +1,557 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text.RegularExpressions; +using Google.Protobuf; +using Microsoft.ML.Data; +using Microsoft.ML.Model.Onnx; +using Microsoft.ML.RunTests; +using Microsoft.ML.Tools; +using Microsoft.ML.Transforms; +using Microsoft.ML.UniversalModelFormat.Onnx; +using Newtonsoft.Json; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Tests +{ + public class OnnxConversionTest : BaseTestBaseline + { + private class AdultData + { + [LoadColumn(0, 10), ColumnName("FeatureVector")] + public float Features { get; set; } + + [LoadColumn(11)] + public float Target { get; set; } + } + + public OnnxConversionTest(ITestOutputHelper output) : base(output) + { + } + + /// + /// In this test, we convert a trained into ONNX file and then + /// call to evaluate that file. The outputs of are checked against the original + /// ML.NET model's outputs. + /// + [Fact] + public void SimpleEndToEndOnnxConversionTest() + { + // Step 1: Create and train a ML.NET pipeline. + var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); + var mlContext = new MLContext(seed: 1, conc: 1); + var data = mlContext.Data.ReadFromTextFile(trainDataPath, + hasHeader: true, + separatorChar: ';' + ); + var cachedTrainData = mlContext.Data.Cache(data); + var dynamicPipeline = + mlContext.Transforms.Normalize("FeatureVector") + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumn: "Target", featureColumn: "FeatureVector")); + var model = dynamicPipeline.Fit(data); + var transformedData = model.Transform(data); + + // Step 2: Convert ML.NET model to ONNX format and save it as a file. + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + var onnxFileName = "model.onnx"; + var onnxModelPath = GetOutputPath(onnxFileName); + SaveOnnxModel(onnxModel, onnxModelPath, null); + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) + { + // Step 3: Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = new OnnxScoringEstimator(mlContext, onnxModelPath, inputNames, outputNames); + var onnxTransformer = onnxEstimator.Fit(data); + var onnxResult = onnxTransformer.Transform(data); + + // Step 4: Compare ONNX and ML.NET results. + CompareSelectedR4ScalarColumns("Score", "Score0", transformedData, onnxResult, 1); + } + + // Step 5: Check ONNX model's text format. This test will be not necessary if Step 3 and Step 4 can run on Linux and + // Mac to support cross-platform tests. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Regression", "Adult"); + var onnxTextName = "SimplePipeline.txt"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + SaveOnnxModel(onnxModel, null, onnxTextPath); + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 3); + + Done(); + } + + private class BreastCancerFeatureVector + { + [LoadColumn(1, 9), VectorType(9)] + public float[] Features; + } + + private class BreastCancerCatFeatureExample + { + [LoadColumn(0)] + public bool Label; + + [LoadColumn(1)] + public float F1; + + [LoadColumn(2)] + public string F2; + } + + private class BreastCancerMulticlassExample + { + [LoadColumn(1)] + public string Label; + + [LoadColumn(2, 9), VectorType(8)] + public float[] Features; + } + + [Fact] + public void KmeansOnnxConversionTest() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 1, conc: 1); + + string dataPath = GetDataPath("breast-cancer.txt"); + // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). + var data = mlContext.Data.ReadFromTextFile(dataPath, + hasHeader: true, + separatorChar: '\t'); + + var pipeline = mlContext.Transforms.Normalize("Features"). + Append(mlContext.Clustering.Trainers.KMeans(features: "Features", advancedSettings: settings => + { + settings.MaxIterations = 1; + settings.K = 4; + settings.NumThreads = 1; + settings.InitAlgorithm = Trainers.KMeans.KMeansPlusPlusTrainer.InitAlgorithm.Random; + })); + + var model = pipeline.Fit(data); + var transformedData = model.Transform(data); + + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + + // Compare results produced by ML.NET and ONNX's runtime. + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) + { + var onnxFileName = "model.onnx"; + var onnxModelPath = GetOutputPath(onnxFileName); + SaveOnnxModel(onnxModel, onnxModelPath, null); + + // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = new OnnxScoringEstimator(mlContext, onnxModelPath, inputNames, outputNames); + var onnxTransformer = onnxEstimator.Fit(data); + var onnxResult = onnxTransformer.Transform(data); + CompareSelectedR4VectorColumns("Score", "Score0", transformedData, onnxResult, 3); + } + + // Check ONNX model's text format. We save the produced ONNX model as a text file and compare it against + // the associated file in ML.NET repo. Such a comparison can be retired if ONNXRuntime ported to ML.NET + // can support Linux and Mac. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Cluster", "BreastCancer"); + var onnxTextName = "Kmeans.txt"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + SaveOnnxModel(onnxModel, null, onnxTextPath); + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 2); + Done(); + } + + [Fact] + void CommandLineOnnxConversionTest() + { + string dataPath = GetDataPath("breast-cancer.txt"); + string modelPath = GetOutputPath("ModelWithLessIO.zip"); + var trainingPathArgs = $"data={dataPath} out={modelPath}"; + var trainingArgs = " loader=text{col=Label:BL:0 col=F1:R4:1-8 col=F2:TX:9} xf=Cat{col=F2} xf=Concat{col=Features:F1,F2} tr=ft{numThreads=1 numLeaves=8 numTrees=3} seed=1"; + Assert.Equal(0, Maml.Main(new[] { "train " + trainingPathArgs + trainingArgs})); + + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); + var onnxTextName = "ModelWithLessIO.txt"; + var onnxFileName = "ModelWithLessIO.onnx"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + string conversionCommand = $"saveonnx in={modelPath} onnx={onnxFilePath} json={onnxTextPath} domain=machinelearning.dotnet name=modelWithLessIO inputsToDrop=Label outputsToDrop=F1,F2,Features,Label"; + Assert.Equal(0, Maml.Main(new[] { conversionCommand })); + + var fileText = File.ReadAllText(onnxTextPath); + fileText = Regex.Replace(fileText, "\"producerVersion\": \".*\"", "\"producerVersion\": \"##VERSION##\""); + File.WriteAllText(onnxTextPath, fileText); + + CheckEquality(subDir, onnxTextName); + Done(); + } + + [Fact] + public void KeyToVectorWithBagOnnxConversionTest() + { + var mlContext = new MLContext(seed: 1, conc: 1); + + string dataPath = GetDataPath("breast-cancer.txt"); + + var data = mlContext.Data.ReadFromTextFile(dataPath, + hasHeader: true, + separatorChar: '\t'); + + var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingTransformer.ColumnInfo("F2"))) + .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) + .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features", numLeaves: 2, numTrees: 1, minDatapointsInLeaves: 2)); + + var model = pipeline.Fit(data); + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + + // Check ONNX model's text format. We save the produced ONNX model as a text file and compare it against + // the associated file in ML.NET repo. Such a comparison can be retired if ONNXRuntime ported to ML.NET + // can support Linux and Mac. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); + var onnxTextName = "OneHotBagPipeline.txt"; + var onnxFileName = "OneHotBagPipeline.onnx"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + CheckEquality(subDir, onnxTextName); + Done(); + } + + [Fact] + public void InitializerCreationTest() + { + var env = new MLContext(); + // Create the actual implementation + var ctxImpl = new OnnxContextImpl(env, "model", "ML.NET", "0", 0, "com.test", Model.Onnx.OnnxVersion.Stable); + + // Use implementation as in the actual conversion code + var ctx = ctxImpl as OnnxContext; + ctx.AddInitializer(9.4f, "float"); + ctx.AddInitializer(17L, "int64"); + ctx.AddInitializer("36", "string"); + ctx.AddInitializer(new List { 9.4f, 1.7f, 3.6f }, new List { 1, 3 }, "floats"); + ctx.AddInitializer(new List { 94L, 17L, 36L }, new List { 1, 3 }, "int64s"); + ctx.AddInitializer(new List { "94", "17", "36" }, new List { 1, 3 }, "strings"); + + var model = ctxImpl.MakeModel(); + + var floatScalar = model.Graph.Initializer[0]; + Assert.True(floatScalar.Name == "float"); + Assert.True(floatScalar.Dims.Count == 0); + Assert.True(floatScalar.FloatData.Count == 1); + Assert.True(floatScalar.FloatData[0] == 9.4f); + + var int64Scalar = model.Graph.Initializer[1]; + Assert.True(int64Scalar.Name == "int64"); + Assert.True(int64Scalar.Dims.Count == 0); + Assert.True(int64Scalar.Int64Data.Count == 1); + Assert.True(int64Scalar.Int64Data[0] == 17L); + + var stringScalar = model.Graph.Initializer[2]; + Assert.True(stringScalar.Name == "string"); + Assert.True(stringScalar.Dims.Count == 0); + Assert.True(stringScalar.StringData.Count == 1); + Assert.True(stringScalar.StringData[0].ToStringUtf8() == "36"); + + var floatsTensor = model.Graph.Initializer[3]; + Assert.True(floatsTensor.Name == "floats"); + Assert.True(floatsTensor.Dims.Count == 2); + Assert.True(floatsTensor.Dims[0] == 1); + Assert.True(floatsTensor.Dims[1] == 3); + Assert.True(floatsTensor.FloatData.Count == 3); + Assert.True(floatsTensor.FloatData[0] == 9.4f); + Assert.True(floatsTensor.FloatData[1] == 1.7f); + Assert.True(floatsTensor.FloatData[2] == 3.6f); + + var int64sTensor = model.Graph.Initializer[4]; + Assert.True(int64sTensor.Name == "int64s"); + Assert.True(int64sTensor.Dims.Count == 2); + Assert.True(int64sTensor.Dims[0] == 1); + Assert.True(int64sTensor.Dims[1] == 3); + Assert.True(int64sTensor.Int64Data.Count == 3); + Assert.True(int64sTensor.Int64Data[0] == 94L); + Assert.True(int64sTensor.Int64Data[1] == 17L); + Assert.True(int64sTensor.Int64Data[2] == 36L); + + var stringsTensor = model.Graph.Initializer[5]; + Assert.True(stringsTensor.Name == "strings"); + Assert.True(stringsTensor.Dims.Count == 2); + Assert.True(stringsTensor.Dims[0] == 1); + Assert.True(stringsTensor.Dims[1] == 3); + Assert.True(stringsTensor.StringData.Count == 3); + Assert.True(stringsTensor.StringData[0].ToStringUtf8() == "94"); + Assert.True(stringsTensor.StringData[1].ToStringUtf8() == "17"); + Assert.True(stringsTensor.StringData[2].ToStringUtf8() == "36"); + } + + [Fact] + public void LogisticRegressionOnnxConversionTest() + { + // Step 1: Create and train a ML.NET pipeline. + var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); + var mlContext = new MLContext(seed: 1, conc: 1); + var data = mlContext.Data.ReadFromTextFile(trainDataPath, + hasHeader: true, + separatorChar: ';' + ); + var cachedTrainData = mlContext.Data.Cache(data); + var dynamicPipeline = + mlContext.Transforms.Normalize("FeatureVector") + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumn: "Target", featureColumn: "FeatureVector")); + var model = dynamicPipeline.Fit(data); + + // Step 2: Convert ML.NET model to ONNX format and save it as a file. + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + + // Step 3: Save ONNX model as binary and text files. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); + var onnxFileName = "LogisticRegressionSaveModelToOnnxTest.onnx"; + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + var onnxTextName = "LogisticRegressionSaveModelToOnnxTest.txt"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + + // Step 4: Check ONNX model's text format. + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 3); + Done(); + } + + [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only + public void LightGbmBinaryClassificationOnnxConversionTest() + { + // Step 1: Create and train a ML.NET pipeline. + var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); + var mlContext = new MLContext(seed: 1, conc: 1); + var data = mlContext.Data.ReadFromTextFile(trainDataPath, + hasHeader: true, + separatorChar: ';' + ); + var cachedTrainData = mlContext.Data.Cache(data); + var dynamicPipeline = + mlContext.Transforms.Normalize("FeatureVector") + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Regression.Trainers.LightGbm(labelColumn: "Target", featureColumn: "FeatureVector", numBoostRound: 3, numLeaves: 16, minDataPerLeaf: 100)); + var model = dynamicPipeline.Fit(data); + + // Step 2: Convert ML.NET model to ONNX format and save it as a file. + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + + // Step 3: Save ONNX model as binary and text files. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); + var onnxFileName = "LightGbmBinaryClassificationOnnxConversionTest.onnx"; + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + var onnxTextName = "LightGbmBinaryClassificationOnnxConversionTest.txt"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + + // Step 4: Check ONNX model's text format. + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 3); + Done(); + } + + [Fact] + public void MulticlassLogisticRegressionOnnxConversionTest() + { + var mlContext = new MLContext(seed: 1, conc: 1); + + string dataPath = GetDataPath("breast-cancer.txt"); + var data = mlContext.Data.ReadFromTextFile(dataPath, + hasHeader: true, + separatorChar: '\t'); + + var pipeline = mlContext.Transforms.Normalize("Features"). + Append(mlContext.Transforms.Conversion.MapValueToKey("Label")). + Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: "Label", featureColumn: "Features", + advancedSettings: settings => + { + settings.UseThreads = false; + })); + + var model = pipeline.Fit(data); + var transformedData = model.Transform(data); + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "MultiClassClassification", "BreastCancer"); + var onnxFileName = "MultiClassificationLogisticRegressionSaveModelToOnnxTest.onnx"; + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + var onnxTextName = "MultiClassificationLogisticRegressionSaveModelToOnnxTest.txt"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 2); + Done(); + } + + [Fact] + public void RemoveVariablesInPipelineTest() + { + var mlContext = new MLContext(seed: 1, conc: 1); + + string dataPath = GetDataPath("breast-cancer.txt"); + var data = mlContext.Data.ReadFromTextFile(dataPath, + hasHeader: true, + separatorChar: '\t'); + + var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) + .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingTransformer.ColumnInfo("F2"))) + .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features", numLeaves: 2, numTrees: 1, minDatapointsInLeaves: 2)); + + var model = pipeline.Fit(data); + var transformedData = model.Transform(data); + + var onnxConversionContext = new OnnxContextImpl(mlContext, "A Simple Pipeline", "ML.NET", "0", 0, "machinelearning.dotnet", OnnxVersion.Stable); + + LinkedList transforms = null; + using (var conversionChannel = (mlContext as IChannelProvider).Start("ONNX conversion")) + { + SaveOnnxCommand.GetPipe(onnxConversionContext, conversionChannel, transformedData, out IDataView root, out IDataView sink, out transforms); + // Input columns' names to be excluded in the resulted ONNX model. + var redundantInputColumnNames = new HashSet { "Label" }; + // Output columns' names to be excluded in the resulted ONNX model. + var redundantOutputColumnNames = new HashSet { "Label", "F1", "F2", "Features" }; + var onnxModel = SaveOnnxCommand.ConvertTransformListToOnnxModel(onnxConversionContext, conversionChannel, root, sink, transforms, + redundantInputColumnNames, redundantOutputColumnNames); + + // Check ONNX model's text format. We save the produced ONNX model as a text file and compare it against + // the associated file in ML.NET repo. Such a comparison can be retired if ONNXRuntime ported to ML.NET + // can support Linux and Mac. + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); + var onnxTextName = "ExcludeVariablesInOnnxConversion.txt"; + var onnxFileName = "ExcludeVariablesInOnnxConversion.onnx"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + CheckEquality(subDir, onnxTextName, digitsOfPrecision: 3); + } + Done(); + } + + private class SmallSentimentExample + { + [LoadColumn(0,3), VectorType(4)] + public string[] Tokens; + } + + [Fact] + public void WordEmbeddingsTest() + { + var mlContext = new MLContext(seed: 1, conc: 1); + var dataPath = GetDataPath(@"small-sentiment-test.tsv"); + var embedNetworkPath = GetDataPath(@"shortsentiment.emd"); + var data = mlContext.Data.ReadFromTextFile(dataPath, hasHeader: false, separatorChar: '\t'); + + var pipeline = mlContext.Transforms.Text.ExtractWordEmbeddings("Tokens", embedNetworkPath, "Embed"); + var model = pipeline.Fit(data); + var transformedData = model.Transform(data); + + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Transforms", "Sentiment"); + var onnxTextName = "SmallWordEmbed.txt"; + var onnxFileName = "SmallWordEmbed.onnx"; + var onnxTextPath = GetOutputPath(subDir, onnxTextName); + var onnxFilePath = GetOutputPath(subDir, onnxFileName); + var onnxModel = mlContext.Model.ConvertToOnnx(model, data); + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); + + CheckEquality(subDir, onnxTextName); + Done(); + } + + private void CreateDummyExamplesToMakeComplierHappy() + { + var dummyExample = new BreastCancerFeatureVector() { Features = null }; + var dummyExample1 = new BreastCancerCatFeatureExample() { Label = false, F1 = 0, F2 = "Amy" }; + var dummyExample2 = new BreastCancerMulticlassExample() { Label = "Amy", Features = null }; + var dummyExample3 = new SmallSentimentExample() { Tokens = null }; + } + + private void CompareSelectedR4VectorColumns(string leftColumnName, string rightColumnName, IDataView left, IDataView right, int precision = 6) + { + var leftColumnIndex = left.Schema[leftColumnName].Index; + var rightColumnIndex = right.Schema[rightColumnName].Index; + + using (var expectedCursor = left.GetRowCursor(columnIndex => leftColumnIndex == columnIndex)) + using (var actualCursor = right.GetRowCursor(columnIndex => rightColumnIndex == columnIndex)) + { + VBuffer expected = default; + VBuffer actual = default; + var expectedGetter = expectedCursor.GetGetter>(leftColumnIndex); + var actualGetter = actualCursor.GetGetter>(rightColumnIndex); + while (expectedCursor.MoveNext() && actualCursor.MoveNext()) + { + expectedGetter(ref expected); + actualGetter(ref actual); + + Assert.Equal(expected.Length, actual.Length); + for (int i = 0; i < expected.Length; ++i) + Assert.Equal(expected.GetItemOrDefault(i), actual.GetItemOrDefault(i), precision); + } + } + } + + private void CompareSelectedR4ScalarColumns(string leftColumnName, string rightColumnName, IDataView left, IDataView right, int precision = 6) + { + var leftColumnIndex = left.Schema[leftColumnName].Index; + var rightColumnIndex = right.Schema[rightColumnName].Index; + + using (var expectedCursor = left.GetRowCursor(columnIndex => leftColumnIndex == columnIndex)) + using (var actualCursor = right.GetRowCursor(columnIndex => rightColumnIndex == columnIndex)) + { + float expected = default; + VBuffer actual = default; + var expectedGetter = expectedCursor.GetGetter(leftColumnIndex); + var actualGetter = actualCursor.GetGetter>(rightColumnIndex); + while (expectedCursor.MoveNext() && actualCursor.MoveNext()) + { + expectedGetter(ref expected); + actualGetter(ref actual); + + // Scalar such as R4 (float) is converted to [1, 1]-tensor in ONNX format for consitency of making batch prediction. + Assert.Equal(1, actual.Length); + Assert.Equal(expected, actual.GetItemOrDefault(0), precision); + } + } + } + + private void SaveOnnxModel(ModelProto model, string binaryFormatPath, string textFormatPath) + { + DeleteOutputPath(binaryFormatPath); // Clean if such a file exists. + DeleteOutputPath(textFormatPath); + + if (binaryFormatPath != null) + using (var file = Env.CreateOutputFile(binaryFormatPath)) + using (var stream = file.CreateWriteStream()) + model.WriteTo(stream); + + if (textFormatPath != null) + { + using (var file = Env.CreateOutputFile(textFormatPath)) + using (var stream = file.CreateWriteStream()) + using (var writer = new StreamWriter(stream)) + { + var parsedJson = JsonConvert.DeserializeObject(model.ToString()); + writer.Write(JsonConvert.SerializeObject(parsedJson, Formatting.Indented)); + } + + // Strip the version information. + var fileText = File.ReadAllText(textFormatPath); + + fileText = Regex.Replace(fileText, "\"producerVersion\": \".*\"", "\"producerVersion\": \"##VERSION##\""); + File.WriteAllText(textFormatPath, fileText); + } + } + } +} diff --git a/test/Microsoft.ML.Tests/OnnxTests.cs b/test/Microsoft.ML.Tests/OnnxTests.cs deleted file mode 100644 index 117d83ffff..0000000000 --- a/test/Microsoft.ML.Tests/OnnxTests.cs +++ /dev/null @@ -1,591 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.IO; -using System.Text.RegularExpressions; -using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Microsoft.ML.Model.Onnx; -using Microsoft.ML.RunTests; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.Tests -{ -#pragma warning disable 612, 618 - public class OnnxTests : BaseTestBaseline - { - public OnnxTests(ITestOutputHelper output) : base(output) - { - } - - public class BreastCancerData - { - public float Label; - - public float F1; - public ReadOnlyMemory F2; - } - - public class EmbeddingsData - { - [VectorType(4)] - public string[] Cat; - } - - public class EmbeddingsResult - { - [ColumnName("Cat")] - public float[] Cat; - } - - public class BreastNumericalColumns - { - [VectorType(9)] - public float[] Features; - } - - public class BreastCancerDataAllColumns - { - public float Label; - - [VectorType(9)] - public float[] Features; - } - - public class BreastCancerPrediction - { - [ColumnName("PredictedLabel")] - public bool Cancerous; - } - - public class BreastCancerMCPrediction - { - [ColumnName("Score")] - public float[] Scores; - } - - public class BreastCancerClusterPrediction - { - [ColumnName("PredictedLabel")] - public uint NearestCluster; - [ColumnName("Score")] - public float[] Distances; - } - - [Fact] - public void InitializerCreationTest() - { - var env = new MLContext(); - // Create the actual implementation - var ctxImpl = new OnnxContextImpl(env, "model", "ML.NET", "0", 0, "com.test", Model.Onnx.OnnxVersion.Stable); - - // Use implementation as in the actual conversion code - var ctx = ctxImpl as OnnxContext; - ctx.AddInitializer(9.4f, "float"); - ctx.AddInitializer(17L, "int64"); - ctx.AddInitializer("36", "string"); - ctx.AddInitializer(new List { 9.4f, 1.7f, 3.6f }, new List { 1, 3 }, "floats"); - ctx.AddInitializer(new List { 94L, 17L, 36L }, new List { 1, 3 }, "int64s"); - ctx.AddInitializer(new List { "94", "17", "36" }, new List { 1, 3 }, "strings"); - - var model = ctxImpl.MakeModel(); - - var floatScalar = model.Graph.Initializer[0]; - Assert.True(floatScalar.Name == "float"); - Assert.True(floatScalar.Dims.Count == 0); - Assert.True(floatScalar.FloatData.Count == 1); - Assert.True(floatScalar.FloatData[0] == 9.4f); - - var int64Scalar = model.Graph.Initializer[1]; - Assert.True(int64Scalar.Name == "int64"); - Assert.True(int64Scalar.Dims.Count == 0); - Assert.True(int64Scalar.Int64Data.Count == 1); - Assert.True(int64Scalar.Int64Data[0] == 17L); - - var stringScalar = model.Graph.Initializer[2]; - Assert.True(stringScalar.Name == "string"); - Assert.True(stringScalar.Dims.Count == 0); - Assert.True(stringScalar.StringData.Count == 1); - Assert.True(stringScalar.StringData[0].ToStringUtf8() == "36"); - - var floatsTensor = model.Graph.Initializer[3]; - Assert.True(floatsTensor.Name == "floats"); - Assert.True(floatsTensor.Dims.Count == 2); - Assert.True(floatsTensor.Dims[0] == 1); - Assert.True(floatsTensor.Dims[1] == 3); - Assert.True(floatsTensor.FloatData.Count == 3); - Assert.True(floatsTensor.FloatData[0] == 9.4f); - Assert.True(floatsTensor.FloatData[1] == 1.7f); - Assert.True(floatsTensor.FloatData[2] == 3.6f); - - var int64sTensor = model.Graph.Initializer[4]; - Assert.True(int64sTensor.Name == "int64s"); - Assert.True(int64sTensor.Dims.Count == 2); - Assert.True(int64sTensor.Dims[0] == 1); - Assert.True(int64sTensor.Dims[1] == 3); - Assert.True(int64sTensor.Int64Data.Count == 3); - Assert.True(int64sTensor.Int64Data[0] == 94L); - Assert.True(int64sTensor.Int64Data[1] == 17L); - Assert.True(int64sTensor.Int64Data[2] == 36L); - - var stringsTensor = model.Graph.Initializer[5]; - Assert.True(stringsTensor.Name == "strings"); - Assert.True(stringsTensor.Dims.Count == 2); - Assert.True(stringsTensor.Dims[0] == 1); - Assert.True(stringsTensor.Dims[1] == 3); - Assert.True(stringsTensor.StringData.Count == 3); - Assert.True(stringsTensor.StringData[0].ToStringUtf8() == "94"); - Assert.True(stringsTensor.StringData[1].ToStringUtf8() == "17"); - Assert.True(stringsTensor.StringData[2].ToStringUtf8() == "36"); - } - - [Fact] - public void BinaryClassificationFastTreeSaveModelToOnnxTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "F1", - Source = new [] { new TextLoaderRange(1, 1) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "F2", - Source = new [] { new TextLoaderRange(2, 2) }, - Type = Legacy.Data.DataKind.TX - } - } - } - }); - - pipeline.Add(new MissingValueSubstitutor("F1")); - pipeline.Add(new MinMaxNormalizer("F1")); - pipeline.Add(new CategoricalOneHotVectorizer("F2")); - pipeline.Add(new ColumnConcatenator("Features", "F1", "F2")); - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 2, NumTrees = 1, MinDocumentsInLeafs = 2 }); - - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "BinaryClassificationFastTreeSaveModelToOnnxTest.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "BinaryClassificationFastTreeSaveModelToOnnxTest.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - InputsToDrop = new[] { "Label" }, - OutputsToDrop = new[] { "Label", "F1", "F2", "Features" }, - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "BinaryClassificationFastTreeSaveModelToOnnxTest.json"); - Done(); - } - - [Fact] - public void KeyToVectorWithBagTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "F1", - Source = new [] { new TextLoaderRange(1, 1) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "F2", - Source = new [] { new TextLoaderRange(2, 2) }, - Type = Legacy.Data.DataKind.TX - } - } - } - }); - - var vectorizer = new CategoricalOneHotVectorizer(); - var categoricalColumn = new OneHotEncodingTransformerColumn() - { - OutputKind = OneHotEncodingTransformerOutputKind.Bag, - Name = "F2", - Source = "F2" - }; - vectorizer.Column = new OneHotEncodingTransformerColumn[1] { categoricalColumn }; - pipeline.Add(vectorizer); - pipeline.Add(new ColumnConcatenator("Features", "F1", "F2")); - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 2, NumTrees = 1, MinDocumentsInLeafs = 2 }); - - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "KeyToVectorBag.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "KeyToVectorBag.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - InputsToDrop = new[] { "Label" }, - OutputsToDrop = new[] { "Label", "F1", "F2", "Features" }, - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "KeyToVectorBag.json"); - Done(); - } - - [Fact] - public void WordEmbeddingsTest() - { - string dataPath = GetDataPath(@"small-sentiment-test.tsv"); - var pipeline = new Legacy.LearningPipeline(0); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = false, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Cat", - Source = new [] { new TextLoaderRange(0, 3) }, - Type = Legacy.Data.DataKind.TX - }, - } - } - }); - - var modelPath = GetDataPath(@"shortsentiment.emd"); - var embed = new WordEmbeddings() { CustomLookupTable = modelPath }; - embed.AddColumn("Cat", "Cat"); - pipeline.Add(embed); - var model = pipeline.Train(); - - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "WordEmbeddings"); - var onnxPath = GetOutputPath(subDir, "WordEmbeddings.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "WordEmbeddings.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "WordEmbeddings.json"); - Done(); - } - - [ConditionalFact(typeof(BaseTestBaseline), nameof(BaseTestBaseline.LessThanNetCore30OrNotNetCore))] // netcore3.0 differs from Baseline - public void KmeansTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(0); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Features", - Source = new [] { new TextLoaderRange(1, 9) }, - Type = Legacy.Data.DataKind.R4 - }, - } - } - }); - - pipeline.Add(new KMeansPlusPlusClusterer() { K = 2, MaxIterations = 1, NumThreads = 1, InitAlgorithm = KMeansPlusPlusTrainerInitAlgorithm.Random }); - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Cluster", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "Kmeans.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "Kmeans.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "Kmeans.json"); - Done(); - } - - - [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only - public void BinaryClassificationLightGBMSaveModelToOnnxTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Features", - Source = new [] { new TextLoaderRange(1, 9) }, - Type = Legacy.Data.DataKind.Num - } - } - } - }); - - pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 2, NumBoostRound = 1, MinDataPerLeaf = 2 }); - - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "BinaryClassificationLightGBMSaveModelToOnnxTest.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "BinaryClassificationLightGBMSaveModelToOnnxTest.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - InputsToDrop = new[] { "Label" }, - OutputsToDrop = new[] { "Label", "Features" }, - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "BinaryClassificationLightGBMSaveModelToOnnxTest.json"); - Done(); - } - - [Fact] - public void BinaryClassificationLRSaveModelToOnnxTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Features", - Source = new [] { new TextLoaderRange(1, 9) }, - Type = Legacy.Data.DataKind.Num - } - } - } - }); - - pipeline.Add(new LogisticRegressionBinaryClassifier() { UseThreads = false }); - - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "BinaryClassification", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "BinaryClassificationLRSaveModelToOnnxTest.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "BinaryClassificationLRSaveModelToOnnxTest.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - InputsToDrop = new[] { "Label" }, - OutputsToDrop = new[] { "Label", "Features" }, - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "BinaryClassificationLRSaveModelToOnnxTest.json", digitsOfPrecision: 3); - Done(); - } - - [Fact] - public void MultiClassificationLRSaveModelToOnnxTest() - { - string dataPath = GetDataPath(@"breast-cancer.txt"); - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new TextLoaderColumn() - { - Name = "Label", - Source = new [] { new TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new TextLoaderColumn() - { - Name = "Features", - Source = new [] { new TextLoaderRange(1, 9) }, - Type = Legacy.Data.DataKind.Num - } - } - } - }); - - pipeline.Add(new Dictionarizer("Label")); - pipeline.Add(new LogisticRegressionClassifier() { UseThreads = false }); - - var model = pipeline.Train(); - var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "MultiClassClassification", "BreastCancer"); - var onnxPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.onnx"); - DeleteOutputPath(onnxPath); - - var onnxAsJsonPath = GetOutputPath(subDir, "MultiClassificationLRSaveModelToOnnxTest.json"); - DeleteOutputPath(onnxAsJsonPath); - - OnnxConverter converter = new OnnxConverter() - { - InputsToDrop = new[] { "Label" }, - OutputsToDrop = new[] { "Label", "Features" }, - Onnx = onnxPath, - Json = onnxAsJsonPath, - Domain = "Onnx" - }; - - converter.Convert(model); - - // Strip the version. - var fileText = File.ReadAllText(onnxAsJsonPath); - fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\""); - File.WriteAllText(onnxAsJsonPath, fileText); - - CheckEquality(subDir, "MultiClassificationLRSaveModelToOnnxTest.json", digitsOfPrecision: 4); - Done(); - } - - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/PredictionModelTests.cs b/test/Microsoft.ML.Tests/PredictionModelTests.cs deleted file mode 100644 index 7d8d565445..0000000000 --- a/test/Microsoft.ML.Tests/PredictionModelTests.cs +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using System.Threading.Tasks; -using Microsoft.ML.Data; -using Microsoft.ML.TestFramework; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.EntryPoints.Tests -{ -#pragma warning disable 612 - public class PredictionModelTests : BaseTestClass - { - public class HousePriceData - { - public float Bedrooms; - public float Bathrooms; - public float SqftLiving; - public float SqftLot; - public float Floors; - public float Waterfront; - public float View; - public float Condition; - public float Grade; - public float SqftAbove; - public float SqftBasement; - public float YearBuilt; - public float YearRenovated; - public float Zipcode; - public float Lat; - public float Long; - public float SqftLiving15; - public float SqftLot15; - } - - public class HousePricePrediction - { - [ColumnName("Score")] - public float Price; - } - - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] - public async Task ReadStrongTypeModelFromStream() - { - using (var memoryStream = new MemoryStream()) - { - ModelHelper.WriteKcHousePriceModel(GetDataPath("kc_house_data.csv"), memoryStream); - memoryStream.Position = 0; - - var model = await Legacy.PredictionModel.ReadAsync(memoryStream); - - HousePricePrediction prediction = model.Predict(new HousePriceData() - { - Bedrooms = 3, - Bathrooms = 1.75f, - SqftLiving = 2450, - SqftLot = 2691, - Floors = 2, - Waterfront = 0, - View = 0, - Condition = 3, - Grade = 8, - SqftAbove = 1750, - SqftBasement = 700, - YearBuilt = 1915, - YearRenovated = 0, - Zipcode = 98119, - Lat = 47.6386f, - Long = -122.36f, - SqftLiving15 = 1760, - SqftLot15 = 3573 - }); - - Assert.InRange(prediction.Price, 790_000, 850_000); - - - var dataView = model.Predict(ModelHelper.GetKcHouseDataView(GetDataPath("kc_house_data.csv"))); - dataView.Schema.TryGetColumnIndex("Score", out int scoreColumn); - using (var cursor = dataView.GetRowCursor((int col) => col == scoreColumn)) - { - var scoreGetter = cursor.GetGetter(scoreColumn); - float score = 0; - cursor.MoveNext(); - scoreGetter(ref score); - Assert.InRange(score, 100_000, 200_000); - } - - Legacy.PredictionModel nonGenericModel; - using (var anotherStream = new MemoryStream()) - { - await model.WriteAsync(anotherStream); - nonGenericModel = await Legacy.PredictionModel.ReadAsync(anotherStream); - } - - dataView = nonGenericModel.Predict(ModelHelper.GetKcHouseDataView(GetDataPath("kc_house_data.csv"))); - using (var cursor = dataView.GetRowCursor((int col) => col == scoreColumn)) - { - var scoreGetter = cursor.GetGetter(scoreColumn); - float score = 0; - cursor.MoveNext(); - scoreGetter(ref score); - Assert.InRange(score, 100_000, 200_000); - } - } - } - - public PredictionModelTests(ITestOutputHelper output) - : base(output) - { - } - } -#pragma warning restore 612 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs index b8221188a2..6016cfbdb0 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs @@ -18,7 +18,7 @@ public partial class ApiScenariosTests /// they sometimes want the actual models too.) /// [Fact] - void New_CrossValidation() + void CrossValidation() { var ml = new MLContext(seed: 1, conc: 1); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs index ffdf491876..94f53e65f2 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs @@ -22,7 +22,7 @@ public partial class ApiScenariosTests /// say, injecting a dummy label. /// [Fact] - void New_DecomposableTrainAndPredict() + void DecomposableTrainAndPredict() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs index 2d26d1ddf0..5d890cf7b8 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs @@ -17,7 +17,7 @@ public partial class ApiScenariosTests /// Getting metrics out of this shoudl be as straightforward and unannoying as possible. /// [Fact] - public void New_Evaluation() + public void Evaluation() { var ml = new MLContext(seed: 1, conc: 1); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs index f3f4d9d7c1..84bd6691e9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs @@ -21,7 +21,7 @@ public partial class ApiScenariosTests /// usage of already established components), but should still be possible. /// [Fact] - void New_Extensibility() + void Extensibility() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs index a937b94eae..afae98455c 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs @@ -20,7 +20,7 @@ public partial class ApiScenariosTests /// and don't necessarily want to transform it every single time.) /// [Fact] - void New_FileBasedSavingOfData() + void FileBasedSavingOfData() { var ml = new MLContext(seed: 1, conc: 1); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs index 5783d8e58b..023cff8d76 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs @@ -25,7 +25,7 @@ public partial class ApiScenariosTests /// [Fact] - public void New_IntrospectiveTraining() + public void IntrospectiveTraining() { var ml = new MLContext(seed: 1, conc: 1); var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs index 1845bcf40c..70b6b0bbb5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs @@ -19,7 +19,7 @@ public partial class ApiScenariosTests /// If they specify a regression or multi-class classifier ideally that should be a compile error. /// [Fact] - public void New_Metacomponents() + public void Metacomponents() { var ml = new MLContext(); var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ','); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs index 77e8f76133..e710956462 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs @@ -21,7 +21,7 @@ public partial class ApiScenariosTests /// and performant in the new API. /// [Fact] - void New_MultithreadedPrediction() + void MultithreadedPrediction() { var ml = new MLContext(seed: 1, conc: 1); var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs index 1cb4d42350..5b9482ae6e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs @@ -17,7 +17,7 @@ public partial class ApiScenariosTests /// with some threshold derived from that. /// [Fact] - public void New_ReconfigurablePrediction() + public void ReconfigurablePrediction() { var ml = new MLContext(seed: 1, conc: 1); var dataReader = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs index 8e9ec0e99b..af2c7ffa99 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs @@ -18,7 +18,7 @@ public partial class ApiScenariosTests /// (for example, the prediction does not happen over a file as it did during training). /// [Fact] - public void New_SimpleTrainAndPredict() + public void SimpleTrainAndPredict() { var ml = new MLContext(seed: 1, conc: 1); var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); @@ -45,5 +45,41 @@ public void New_SimpleTrainAndPredict() Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } + + /// + /// Start with a dataset in a text file. Run text featurization on text values. + /// Train a linear model over that. (I am thinking sentiment classification.) + /// Out of the result, produce some structure over which you can get predictions programmatically + /// (for example, the prediction does not happen over a file as it did during training). + /// Uses Symbolic SDCA Trainer. + /// + [Fact] + public void SimpleTrainAndPredictSymSGD() + { + var ml = new MLContext(seed: 1, conc: 1); + var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); + + // Pipeline. + var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") + .AppendCacheCheckpoint(ml) + .Append(ml.BinaryClassification.Trainers.SymbolicStochasticGradientDescent("Label", "Features", advancedSettings: s => s.NumberOfThreads = 1)); + + // Train. + var model = pipeline.Fit(data); + + // Create prediction engine and test predictions. + var engine = model.CreatePredictionEngine(ml); + + // Take a couple examples out of the test data and run predictions on top. + var testData = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true) + .AsEnumerable(ml, false); + foreach (var input in testData.Take(5)) + { + var prediction = engine.Predict(input); + // Verify that predictions match and scores are separated from zero. + Assert.Equal(input.Sentiment, prediction.Sentiment); + Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); + } + } } } diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs index b322b5f202..d9cfa732a9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs @@ -20,7 +20,7 @@ public partial class ApiScenariosTests /// "communication pipe" is just a serialized model of some form. /// [Fact] - public void New_TrainSaveModelAndPredict() + public void TrainSaveModelAndPredict() { var ml = new MLContext(seed: 1, conc: 1); var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs index 307aaebf3c..4954ecea4a 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs @@ -15,7 +15,7 @@ public partial class ApiScenariosTests /// The scenario might be one of the online linear learners that can take advantage of this, for example, averaged perceptron. /// [Fact] - public void New_TrainWithInitialPredictor() + public void TrainWithInitialPredictor() { var ml = new MLContext(seed: 1, conc: 1); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs index 41eb4f91ba..64e0b42587 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs @@ -14,7 +14,7 @@ public partial class ApiScenariosTests /// The learner might be trees with early stopping. /// [Fact] - public void New_TrainWithValidationSet() + public void TrainWithValidationSet() { var ml = new MLContext(seed: 1, conc: 1); // Pipeline. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs index 9c5668ccb1..2ef382e5d5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs @@ -20,7 +20,7 @@ public partial class ApiScenariosTests /// float vector {3:1, 25:1, 203:1, 511:1}, etc. etc. /// [Fact] - void New_Visibility() + void Visibility() { var ml = new MLContext(seed: 1, conc: 1); var pipeline = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs index ec35d9aa2a..5ba77797be 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs @@ -231,7 +231,7 @@ public void MetadataSupportInDataViewConstruction() Assert.True(idv.Schema[1].Metadata.Schema.Count == 3); Assert.True(idv.Schema[1].Metadata.Schema[0].Name == kindStringArray); - Assert.True(idv.Schema[1].Metadata.Schema[0].Type.IsVector && idv.Schema[1].Metadata.Schema[0].Type.ItemType.IsText); + Assert.True(idv.Schema[1].Metadata.Schema[0].Type.IsVector && idv.Schema[1].Metadata.Schema[0].Type.ItemType is TextType); Assert.Throws(() => idv.Schema[1].Metadata.Schema[kindFloat]); float retrievedFloat = 0; diff --git a/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs b/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs index 59ca04fcac..d34c7144c8 100644 --- a/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs @@ -1,63 +1,16 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Transforms; using Xunit; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612, 618 public partial class ScenariosTests { - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/203")] - public void PredictNewsCluster() - { - string dataPath = GetDataPath(@"external/20newsgroups.txt"); - - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: false, allowQuotedStrings: true, supportSparse: false)); - pipeline.Add(new ColumnConcatenator("AllText", "Subject", "Content")); - pipeline.Add(new TextFeaturizer("Features", "AllText") - { - KeepPunctuations = false, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2, - CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 1, AllLengths = true } - }); - - pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer() { K = 20 }); - var model = pipeline.Train(); - var gunResult = model.Predict(new NewsData() { Subject = "Let's disscuss gun control", Content = @"The United States has 88.8 guns per 100 people, or about 270,000,000 guns, which is the highest total and per capita number in the world. 22% of Americans own one or more guns (35% of men and 12% of women). America's pervasive gun culture stems in part from its colonial history, revolutionary roots, frontier expansion, and the Second Amendment, which states: ""A well regulated militia, - being necessary to the security of a free State, - the right of the people to keep and bear Arms, - shall not be infringed."" - -Proponents of more gun control laws state that the Second Amendment was intended for militias; that gun violence would be reduced; that gun restrictions have always existed; and that a majority of Americans, including gun owners, support new gun restrictions. " }); - var puppiesResult = model.Predict(new NewsData() - { - Subject = "Studies Reveal Five Ways Dogs Show Us Their Love", - Content = @"Let's face it: We all adore our dogs as if they were family and we tend to shower our dogs with affection in numerous ways. Perhaps you may buy your dog a favorite toy or stop by the dog bakery to order some great tasting doggy cookies, or perhaps you just love patting your dog in the evening in the way he most loves. But how do our dogs tell us they love us too? - -Until the day your dog can talk, you'll never likely hear him pronounce ""I love you,"" and in the meantime, don't expect him to purchase you a Hallmark card or some balloons with those renowned romantic words printed on top. Also, don’t expect a box of chocolates or a bouquet of flowers from your dog when Valentine's day is around the corner. Sometimes it might feel like we're living an uneven relationship, but just because dogs don't communicate their love the way we do, doesn't mean they don't love us!" - }); - } - - public class NewsData - { - [LoadColumn(0)] - public string Id; - - [LoadColumn(1) , ColumnName("Label")] - public string Topic; - - [LoadColumn(2)] - public string Subject; - - [LoadColumn(3)] - public string Content; - } - public class ClusteringPrediction { [ColumnName("PredictedLabel")] @@ -100,22 +53,35 @@ public void PredictClusters() } }; } - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - pipeline.Add(Legacy.Data.CollectionDataSource.Create(data)); - pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer() { K = k }); - var model = pipeline.Train(); - //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. + + var mlContext = new MLContext(seed: 1, conc: 1); + + // Turn the data into the ML.NET data view. + // We can use CreateDataView or CreateStreamingDataView, depending on whether 'churnData' is an IList, + // or merely an IEnumerable. + var trainData = mlContext.CreateStreamingDataView(data); + var testData = mlContext.CreateStreamingDataView(clusters); + + // Create Estimator + var pipe = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: k); + + // Train the pipeline + var trainedModel = pipe.Fit(trainData); + + // Validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet(); + var predictFunction = trainedModel.CreatePredictionEngine(mlContext); + for (int i = 0; i < k; i++) { - var scores = model.Predict(clusters[i]); + var scores = predictFunction.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } - var evaluator = new Legacy.Models.ClusterEvaluator(); - var testData = Legacy.Data.CollectionDataSource.Create(clusters); - var metrics = evaluator.Evaluate(model, testData); + // Evaluate the trained pipeline + var predicted = trainedModel.Transform(testData); + var metrics = mlContext.Clustering.Evaluate(predicted); //Label is not specified, so NMI would be equal to NaN Assert.Equal(metrics.Nmi, double.NaN); @@ -124,5 +90,4 @@ public void PredictClusters() Assert.Equal(metrics.AvgMinScore, (double)0.0, 5); } } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.Tests/Scenarios/GetColumnTests.cs b/test/Microsoft.ML.Tests/Scenarios/GetColumnTests.cs index e0805bd63c..4484e07504 100644 --- a/test/Microsoft.ML.Tests/Scenarios/GetColumnTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/GetColumnTests.cs @@ -26,7 +26,7 @@ public void TestGetColumn() var path = GetDataPath(TestDatasets.breastCancer.trainFilename); var env = new MLContext(); - var data = TextLoader.CreateReader(env, ctx => ( + var data = TextLoaderStatic.CreateReader(env, ctx => ( floatScalar: ctx.LoadFloat(1), floatVector: ctx.LoadFloat(2, 6), stringScalar: ctx.LoadText(4), diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs deleted file mode 100644 index ef8f704f4d..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.TestFramework; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.Scenarios -{ - public partial class ScenariosTests : BaseTestClass - { - /* - A real-estate firm Contoso wants to add a house price prediction to their ASP.NET/Xamarin application. - The application will let users submit information about their house, and see a price they could expect if they put the house for sale. - Because real estate transaction data is public, Contoso has historical data they intend to use to train Machine Learning prediction engine. - */ -#pragma warning disable 612 - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] - public async void PredictHousePriceModelTest() - { - string modelFilePath = GetOutputPath("PredictHousePriceModelTest.zip"); - ModelHelper.WriteKcHousePriceModel(GetDataPath("kc_house_data.csv"), modelFilePath); - - var model = await Legacy.PredictionModel.ReadAsync(modelFilePath); - - HousePricePrediction prediction = model.Predict(new HousePriceData() - { - Bedrooms = 3, - Bathrooms = 2, - SqftLiving = 1710, - SqftLot = 4697, - Floors = 1.5f, - Waterfront = 0, - View = 0, - Condition = 5, - Grade = 6, - SqftAbove = 1710, - SqftBasement = 0, - YearBuilt = 1941, - YearRenovated = 0, - Zipcode = 98002, - Lat = 47.3048f, - Long = -122.218f, - SqftLiving15 = 1030, - SqftLot15 = 4705 - }); - - Assert.InRange(prediction.Price, 260_000, 330_000); - } -#pragma warning restore 612 - - public class HousePriceData - { - [LoadColumn(0)] - public string Id; - - [LoadColumn(1)] - public string Date; - - [LoadColumn(2), ColumnName("Label")] - public float Price; - - [LoadColumn(3)] - public float Bedrooms; - - [LoadColumn(4)] - public float Bathrooms; - - [LoadColumn(5)] - public float SqftLiving; - - [LoadColumn(6)] - public float SqftLot; - - [LoadColumn(7)] - public float Floors; - - [LoadColumn(8)] - public float Waterfront; - - [LoadColumn(9)] - public float View; - - [LoadColumn(10)] - public float Condition; - - [LoadColumn(11)] - public float Grade; - - [LoadColumn(12)] - public float SqftAbove; - - [LoadColumn(13)] - public float SqftBasement; - - [LoadColumn(14)] - public float YearBuilt; - - [LoadColumn(15)] - public float YearRenovated; - - [LoadColumn(16)] - public float Zipcode; - - [LoadColumn(17)] - public float Lat; - - [LoadColumn(18)] - public float Long; - - [LoadColumn(19)] - public float SqftLiving15; - - [LoadColumn(20)] - public float SqftLot15; - } - - public class HousePricePrediction - { - [ColumnName("Score")] - public float Price; - } - - public ScenariosTests(ITestOutputHelper output) : base(output) - { - } - } -} - diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs deleted file mode 100644 index 3c5a008dff..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Scenarios -{ -#pragma warning disable 612, 618 - public partial class ScenariosTests - { - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] - public void TrainAndPredictHousePriceModelTest() - { - string dataPath = GetDataPath("kc_house_data.csv"); - - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',')); - - pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures", - "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15")); - - pipeline.Add(new ColumnConcatenator(outputColumn: "CategoryFeatures", - "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode")); - - pipeline.Add(new CategoricalOneHotVectorizer("CategoryFeatures")); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "NumericalFeatures", "CategoryFeatures")); - pipeline.Add(new StochasticDualCoordinateAscentRegressor()); - - var model = pipeline.Train(); - - HousePricePrediction prediction = model.Predict(new HousePriceData() - { - Bedrooms = 3, - Bathrooms = 2, - SqftLiving = 1710, - SqftLot = 4697, - Floors = 1.5f, - Waterfront = 0, - View = 0, - Condition = 5, - Grade = 6, - SqftAbove = 1710, - SqftBasement = 0, - YearBuilt = 1941, - YearRenovated = 0, - Zipcode = 98002, - Lat = 47.3048f, - Long = -122.218f, - SqftLiving15 = 1030, - SqftLot15 = 4705 - }); - - Assert.InRange(prediction.Price, 260_000, 330_000); - - string testDataPath = GetDataPath("kc_house_test.csv"); - var testData = new TextLoader(testDataPath).CreateFrom(useHeader: true, separator: ','); - - var evaluator = new RegressionEvaluator(); - RegressionMetrics metrics = evaluator.Evaluate(model, testData); - Assert.InRange(metrics.L1, 85_000, 89_000); - Assert.InRange(metrics.L2, 17_000_000_000, 19_000_000_000); - Assert.InRange(metrics.Rms, 130_500, 135_000); - Assert.InRange(metrics.LossFn, 17_000_000_000, 19_000_000_000); - Assert.Equal(.8, metrics.RSquared, 1); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index ad634680cc..f3906ca806 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -3,33 +3,47 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; +using Microsoft.ML.RunTests; +using Microsoft.ML.TestFramework; using Xunit; -using TextLoader = Microsoft.ML.Legacy.Data.TextLoader; +using Xunit.Abstractions; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612, 618 - public partial class ScenariosTests + public partial class ScenariosTests : BaseTestClass { [Fact] public void TrainAndPredictIrisModelTest() { - string dataPath = GetDataPath("iris.txt"); - - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - - Legacy.PredictionModel model = pipeline.Train(); - - IrisPrediction prediction = model.Predict(new IrisData() + var mlContext = new MLContext(seed: 1, conc: 1); + + var reader = mlContext.Data.CreateTextReader(columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4) + } + ); + + var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); + + // Read training and test data sets + string dataPath = GetDataPath(TestDatasets.iris.trainFilename); + string testDataPath = dataPath; + var trainData = reader.Read(dataPath); + var testData = reader.Read(testDataPath); + + // Train the pipeline + var trainedModel = pipe.Fit(trainData); + + // Make predictions + var predictFunction = trainedModel.CreatePredictionEngine(mlContext); + IrisPrediction prediction = predictFunction.Predict(new IrisData() { SepalLength = 5.1f, SepalWidth = 3.3f, @@ -41,7 +55,7 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(0, prediction.PredictedLabels[2], 2); - prediction = model.Predict(new IrisData() + prediction = predictFunction.Predict(new IrisData() { SepalLength = 6.4f, SepalWidth = 3.1f, @@ -53,7 +67,7 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(1, prediction.PredictedLabels[2], 2); - prediction = model.Predict(new IrisData() + prediction = predictFunction.Predict(new IrisData() { SepalLength = 4.4f, SepalWidth = 3.1f, @@ -65,53 +79,19 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(.8, prediction.PredictedLabels[1], 1); Assert.Equal(0, prediction.PredictedLabels[2], 2); - // Note: Testing against the same data set as a simple way to test evaluation. - // This isn't appropriate in real-world scenarios. - string testDataPath = GetDataPath("iris.txt"); - var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false); - - var evaluator = new ClassificationEvaluator(); - evaluator.OutputTopKAcc = 3; - ClassificationMetrics metrics = evaluator.Evaluate(model, testData); + // Evaluate the trained pipeline + var predicted = trainedModel.Transform(testData); + var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK: 3); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); Assert.Equal(.06, metrics.LogLoss, 2); - Assert.InRange(metrics.LogLossReduction, 94, 96); Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - - ConfusionMatrix matrix = metrics.ConfusionMatrix; - Assert.Equal(3, matrix.Order); - Assert.Equal(3, matrix.ClassNames.Count); - Assert.Equal("0", matrix.ClassNames[0]); - Assert.Equal("1", matrix.ClassNames[1]); - Assert.Equal("2", matrix.ClassNames[2]); - - Assert.Equal(50, matrix[0, 0]); - Assert.Equal(50, matrix["0", "0"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["0", "1"]); - Assert.Equal(0, matrix[0, 2]); - Assert.Equal(0, matrix["0", "2"]); - - Assert.Equal(0, matrix[1, 0]); - Assert.Equal(0, matrix["1", "0"]); - Assert.Equal(48, matrix[1, 1]); - Assert.Equal(48, matrix["1", "1"]); - Assert.Equal(2, matrix[1, 2]); - Assert.Equal(2, matrix["1", "2"]); - - Assert.Equal(0, matrix[2, 0]); - Assert.Equal(0, matrix["2", "0"]); - Assert.Equal(1, matrix[2, 1]); - Assert.Equal(1, matrix["2", "1"]); - Assert.Equal(49, matrix[2, 2]); - Assert.Equal(49, matrix["2", "2"]); } public class IrisData @@ -137,7 +117,10 @@ public class IrisPrediction [ColumnName("Score")] public float[] PredictedLabels; } + + public ScenariosTests(ITestOutputHelper output) : base(output) + { + } } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 6ad0059032..ff38fbebe5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -3,44 +3,48 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; using Xunit; -using TextLoader = Microsoft.ML.Legacy.Data.TextLoader; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612, 618 public partial class ScenariosTests { [Fact] public void TrainAndPredictIrisModelWithStringLabelTest() { + var mlContext = new MLContext(seed: 1, conc: 1); + + var reader = mlContext.Data.CreateTextReader(columns: new[] + { + new TextLoader.Column("SepalLength", DataKind.R4, 0), + new TextLoader.Column("SepalWidth", DataKind.R4, 1), + new TextLoader.Column("PetalLength", DataKind.R4, 2), + new TextLoader.Column("PetalWidth", DataKind.R4, 3), + new TextLoader.Column("IrisPlantType", DataKind.TX, 4), + }, + separatorChar: ',' + ); + + // Read training and test data sets string dataPath = GetDataPath("iris.data"); - - var pipeline = new Legacy.LearningPipeline(); - - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false, separator: ',')); - - pipeline.Add(new Dictionarizer("Label")); // "IrisPlantType" is used as "Label" because of column attribute name on the field. - - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - - var model = pipeline.Train(); - string[] scoreLabels; - model.TryGetScoreLabelNames(out scoreLabels); - - Assert.NotNull(scoreLabels); - Assert.Equal(3, scoreLabels.Length); - Assert.Equal("Iris-setosa", scoreLabels[0]); - Assert.Equal("Iris-versicolor", scoreLabels[1]); - Assert.Equal("Iris-virginica", scoreLabels[2]); - - IrisPrediction prediction = model.Predict(new IrisDataWithStringLabel() + string testDataPath = dataPath; + var trainData = reader.Read(dataPath); + var testData = reader.Read(testDataPath); + + // Create Estimator + var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("IrisPlantType", "Label"), TransformerScope.TrainTest) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Plant"))); + + // Train the pipeline + var trainedModel = pipe.Fit(trainData); + + // Make predictions + var predictFunction = trainedModel.CreatePredictionEngine(mlContext); + IrisPredictionWithStringLabel prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 5.1f, SepalWidth = 3.3f, @@ -48,11 +52,12 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 0.2f, }); - Assert.Equal(1, prediction.PredictedLabels[0], 2); - Assert.Equal(0, prediction.PredictedLabels[1], 2); - Assert.Equal(0, prediction.PredictedLabels[2], 2); + Assert.Equal(1, prediction.PredictedScores[0], 2); + Assert.Equal(0, prediction.PredictedScores[1], 2); + Assert.Equal(0, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-setosa"); - prediction = model.Predict(new IrisDataWithStringLabel() + prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 6.4f, SepalWidth = 3.1f, @@ -60,11 +65,12 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 2.2f, }); - Assert.Equal(0, prediction.PredictedLabels[0], 2); - Assert.Equal(0, prediction.PredictedLabels[1], 2); - Assert.Equal(1, prediction.PredictedLabels[2], 2); + Assert.Equal(0, prediction.PredictedScores[0], 2); + Assert.Equal(0, prediction.PredictedScores[1], 2); + Assert.Equal(1, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-virginica"); - prediction = model.Predict(new IrisDataWithStringLabel() + prediction = predictFunction.Predict(new IrisDataWithStringLabel() { SepalLength = 4.4f, SepalWidth = 3.1f, @@ -72,18 +78,14 @@ public void TrainAndPredictIrisModelWithStringLabelTest() PetalWidth = 1.2f, }); - Assert.Equal(.2, prediction.PredictedLabels[0], 1); - Assert.Equal(.8, prediction.PredictedLabels[1], 1); - Assert.Equal(0, prediction.PredictedLabels[2], 2); - - // Note: Testing against the same data set as a simple way to test evaluation. - // This isn't appropriate in real-world scenarios. - string testDataPath = GetDataPath("iris.data"); - var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false, separator: ','); + Assert.Equal(.2, prediction.PredictedScores[0], 1); + Assert.Equal(.8, prediction.PredictedScores[1], 1); + Assert.Equal(0, prediction.PredictedScores[2], 2); + Assert.True(prediction.PredictedPlant == "Iris-versicolor"); - var evaluator = new ClassificationEvaluator(); - evaluator.OutputTopKAcc = 3; - ClassificationMetrics metrics = evaluator.Evaluate(model, testData); + // Evaluate the trained pipeline + var predicted = trainedModel.Transform(testData); + var metrics = mlContext.MulticlassClassification.Evaluate(predicted, topK: 3); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); @@ -95,37 +97,9 @@ public void TrainAndPredictIrisModelWithStringLabelTest() Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); - - ConfusionMatrix matrix = metrics.ConfusionMatrix; - Assert.Equal(3, matrix.Order); - Assert.Equal(3, matrix.ClassNames.Count); - Assert.Equal("Iris-setosa", matrix.ClassNames[0]); - Assert.Equal("Iris-versicolor", matrix.ClassNames[1]); - Assert.Equal("Iris-virginica", matrix.ClassNames[2]); - - Assert.Equal(50, matrix[0, 0]); - Assert.Equal(50, matrix["Iris-setosa", "Iris-setosa"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["Iris-setosa", "Iris-versicolor"]); - Assert.Equal(0, matrix[0, 2]); - Assert.Equal(0, matrix["Iris-setosa", "Iris-virginica"]); - - Assert.Equal(0, matrix[1, 0]); - Assert.Equal(0, matrix["Iris-versicolor", "Iris-setosa"]); - Assert.Equal(48, matrix[1, 1]); - Assert.Equal(48, matrix["Iris-versicolor", "Iris-versicolor"]); - Assert.Equal(2, matrix[1, 2]); - Assert.Equal(2, matrix["Iris-versicolor", "Iris-virginica"]); - - Assert.Equal(0, matrix[2, 0]); - Assert.Equal(0, matrix["Iris-virginica", "Iris-setosa"]); - Assert.Equal(1, matrix[2, 1]); - Assert.Equal(1, matrix["Iris-virginica", "Iris-versicolor"]); - Assert.Equal(49, matrix[2, 2]); - Assert.Equal(49, matrix["Iris-virginica", "Iris-virginica"]); } - public class IrisDataWithStringLabel + private class IrisDataWithStringLabel { [LoadColumn(0)] public float SepalLength; @@ -139,9 +113,17 @@ public class IrisDataWithStringLabel [LoadColumn(3)] public float PetalWidth; - [LoadColumn(4), ColumnName("Label")] - public string IrisPlantType; + [LoadColumn(4)] + public string IrisPlantType { get; set; } + } + + private class IrisPredictionWithStringLabel + { + [ColumnName("Score")] + public float[] PredictedScores { get; set; } + + [ColumnName("Plant")] + public string PredictedPlant { get; set; } } } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Evaluation.cs deleted file mode 100644 index dca0fb2c79..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Evaluation.cs +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ -#pragma warning disable 612, 618 - public partial class PipelineApiScenarioTests - { - /// - /// Evaluation: Similar to the simple train scenario, except instead of having some - /// predictive structure, be able to score another "test" data file, run the result - /// through an evaluator and get metrics like AUC, accuracy, PR curves, and whatnot. - /// Getting metrics out of this shoudl be as straightforward and unannoying as possible. - /// - [Fact] - public void Evaluation() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentDataPath); - var pipeline = new Legacy.LearningPipeline(); - - var loader = new TextLoader(dataPath).CreateFrom(); - loader.Arguments.HasHeader = true; - pipeline.Add(loader); - pipeline.Add(MakeSentimentTextTransform()); - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - var model = pipeline.Train(); - var testLearningPipelineItem = new TextLoader(testDataPath).CreateFrom(); - testLearningPipelineItem.Arguments.HasHeader = true; - var evaluator = new BinaryClassificationEvaluator(); - var metrics = evaluator.Evaluate(model, testLearningPipelineItem); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Metacomponents.cs deleted file mode 100644 index 56b6b3ea09..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/Metacomponents.cs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ -#pragma warning disable 612, 618 - public partial class PipelineApiScenarioTests - { - /// - /// Meta-components: Meta-components (for example, components that themselves instantiate components) should not be booby-trapped. - /// When specifying what trainer OVA should use, a user will be able to specify any binary classifier. - /// If they specify a regression or multi-class classifier ideally that should be a compile error. - /// - [Fact] - void Metacomponents() - { - var dataPath = GetDataPath(IrisDataPath); - var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false)); - pipeline.Add(new Dictionarizer(new[] { "Label" })); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - - // This will throw exception during training time if you specify any other than binary classifier. - pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); - - var model = pipeline.Train(); - - var testData = new TextLoader(dataPath).CreateFrom(useHeader: false); - var evaluator = new ClassificationEvaluator(); - ClassificationMetrics metrics = evaluator.Evaluate(model, testData); - - var prediction = model.Predict(new IrisData { PetalLength = 1, PetalWidth = 2, SepalLength = 1.4f, SepalWidth = 1.6f }); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs deleted file mode 100644 index a05fb9c433..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Collections.Generic; -using System.Threading.Tasks; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ - public partial class PipelineApiScenarioTests - { - /// - /// Multi-threaded prediction. A twist on "Simple train and predict", where we account that - /// multiple threads may want predictions at the same time. Because we deliberately do not - /// reallocate internal memory buffers on every single prediction, the PredictionEngine - /// (or its estimator/transformer based successor) is, like most stateful .NET objects, - /// fundamentally not thread safe. This is deliberate and as designed. However, some mechanism - /// to enable multi-threaded scenarios (for example, a web server servicing requests) should be possible - /// and performant in the new API. - /// -#pragma warning disable 612, 618 - [Fact] - void MultithreadedPrediction() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentDataPath); - var pipeline = new Legacy.LearningPipeline(); - - var loader = new TextLoader(dataPath).CreateFrom(); - loader.Arguments.HasHeader = true; - pipeline.Add(loader); - - pipeline.Add(MakeSentimentTextTransform()); - - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); - - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - var model = pipeline.Train(); - var collection = new List(); - int numExamples = 100; - for (int i = 0; i < numExamples; i++) - collection.Add(new SentimentData() { SentimentText = "Let's predict this one!" }); - - Parallel.ForEach(collection, (input) => - { - // We need this lock because model itself is stateful object, and probably not thread safe. - // See comment on top of test. - lock (model) - { - var prediction = model.Predict(input); - } - }); - } -#pragma warning restore 612, 618 - } -} diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/PipelineApiScenarioTests.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/PipelineApiScenarioTests.cs deleted file mode 100644 index 3194370879..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/PipelineApiScenarioTests.cs +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.TestFramework; -using Xunit.Abstractions; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ - public partial class PipelineApiScenarioTests : BaseTestClass - { - public PipelineApiScenarioTests(ITestOutputHelper output) : base(output) - { - } - - public const string IrisDataPath = "iris.data"; - public const string SentimentDataPath = "wikipedia-detox-250-line-data.tsv"; - public const string SentimentTestPath = "wikipedia-detox-250-line-test.tsv"; - - public class IrisData : IrisDataNoLabel - { - [LoadColumn(0)] - public string Label; - } - - public class IrisDataNoLabel - { - [LoadColumn(1)] - public float SepalLength; - - [LoadColumn(2)] - public float SepalWidth; - - [LoadColumn(3)] - public float PetalLength; - - [LoadColumn(4)] - public float PetalWidth; - } - - public class IrisPrediction - { - public float[] Score; - } - - public class SentimentData - { - [LoadColumn(0), ColumnName("Label")] - public bool Sentiment; - [LoadColumn(1)] - public string SentimentText; - } - - public class SentimentPrediction - { - [ColumnName("PredictedLabel")] - public bool Sentiment; - - public float Score; - } - } -} diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs deleted file mode 100644 index 2ef096a343..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ - public partial class PipelineApiScenarioTests - { -#pragma warning disable 612, 618 - /// - /// Start with a dataset in a text file. Run text featurization on text values. - /// Train a linear model over that. (I am thinking sentiment classification.) - /// Out of the result, produce some structure over which you can get predictions programmatically - /// (for example, the prediction does not happen over a file as it did during training). - /// - [Fact] - void SimpleTrainAndPredict() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentDataPath); - var pipeline = new Legacy.LearningPipeline(); - - var loader = new TextLoader(dataPath).CreateFrom(); - loader.Arguments.HasHeader = true; - pipeline.Add(loader); - - pipeline.Add(MakeSentimentTextTransform()); - - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); - - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - var model = pipeline.Train(); - var singlePrediction = model.Predict(new SentimentData() { SentimentText = "Not big fan of this." }); - Assert.True(singlePrediction.Sentiment); - } - - private static TextFeaturizer MakeSentimentTextTransform() - { - return new TextFeaturizer("Features", "SentimentText") - { - KeepPunctuations = false, - OutputTokens = true, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2, - CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } - }; - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/TrainSaveModelAndPredict.cs deleted file mode 100644 index 57aeddc74d..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/TrainSaveModelAndPredict.cs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.PipelineApi -{ -#pragma warning disable 612, 618 - public partial class PipelineApiScenarioTests - { - /// - /// Train, save/load model, predict: - /// Serve the scenario where training and prediction happen in different processes (or even different machines). - /// The actual test will not run in different processes, but will simulate the idea that the - /// "communication pipe" is just a serialized model of some form. - /// - [Fact] - public async void TrainSaveModelAndPredict() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentDataPath); - var pipeline = new Legacy.LearningPipeline(); - - var loader = new TextLoader(dataPath).CreateFrom(); - loader.Arguments.HasHeader = true; - pipeline.Add(loader); - pipeline.Add(MakeSentimentTextTransform()); - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - - var model = pipeline.Train(); - var modelName = "trainSaveAndPredictdModel.zip"; - DeleteOutputPath(modelName); - await model.WriteAsync(modelName); - var loadedModel = await Legacy.PredictionModel.ReadAsync(modelName); - var singlePrediction = loadedModel.Predict(new SentimentData() { SentimentText = "Not big fan of this." }); - Assert.True(singlePrediction.Sentiment); - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs deleted file mode 100644 index 731a8ea5e7..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ /dev/null @@ -1,389 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.ML.Data; -using Microsoft.ML.Legacy; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; -using Xunit; - -namespace Microsoft.ML.Scenarios -{ -#pragma warning disable 612, 618 - public partial class ScenariosTests - { - public const string SentimentDataPath = "wikipedia-detox-250-line-data.tsv"; - public const string SentimentTestPath = "wikipedia-detox-250-line-test.tsv"; - - [Fact] - public void TrainAndPredictSentimentModelTest() - { - var pipeline = PreparePipeline(); - var model = pipeline.Train(); - var testData = PrepareTextLoaderTestData(); - var evaluator = new BinaryClassificationEvaluator(); - var metrics = evaluator.Evaluate(model, testData); - ValidateExamples(model); - ValidateBinaryMetrics(metrics); - } - - [Fact] - public void TrainAndPredictSymSGDSentimentModelTest() - { - var pipeline = PreparePipelineSymSGD(); - var model = pipeline.Train(); - var testData = PrepareTextLoaderTestData(); - var evaluator = new BinaryClassificationEvaluator(); - var metrics = evaluator.Evaluate(model, testData); - ValidateExamplesSymSGD(model); - ValidateBinaryMetricsSymSGD(metrics); - } - - [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only - public void TrainAndPredictLightGBMSentimentModelTest() - { - var pipeline = PreparePipelineLightGBM(); - var model = pipeline.Train(); - var testData = PrepareTextLoaderTestData(); - var evaluator = new BinaryClassificationEvaluator(); - var metrics = evaluator.Evaluate(model, testData); - ValidateExamplesLightGBM(model); - ValidateBinaryMetricsLightGBM(metrics); - } - - private void ValidateBinaryMetricsSymSGD(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) - { - - Assert.Equal(.8889, metrics.Accuracy, 4); - Assert.Equal(1, metrics.Auc, 1); - Assert.Equal(0.96, metrics.Auprc, 2); - Assert.Equal(1, metrics.Entropy, 3); - Assert.Equal(.9, metrics.F1Score, 4); - Assert.Equal(.97, metrics.LogLoss, 3); - Assert.Equal(3.030, metrics.LogLossReduction, 3); - Assert.Equal(1, metrics.NegativePrecision, 3); - Assert.Equal(.778, metrics.NegativeRecall, 3); - Assert.Equal(.818, metrics.PositivePrecision, 3); - Assert.Equal(1, metrics.PositiveRecall); - - var matrix = metrics.ConfusionMatrix; - Assert.Equal(2, matrix.Order); - Assert.Equal(2, matrix.ClassNames.Count); - Assert.Equal("positive", matrix.ClassNames[0]); - Assert.Equal("negative", matrix.ClassNames[1]); - - Assert.Equal(9, matrix[0, 0]); - Assert.Equal(9, matrix["positive", "positive"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["positive", "negative"]); - - Assert.Equal(2, matrix[1, 0]); - Assert.Equal(2, matrix["negative", "positive"]); - Assert.Equal(7, matrix[1, 1]); - Assert.Equal(7, matrix["negative", "negative"]); - - } - - private void ValidateBinaryMetricsLightGBM(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) - { - - Assert.Equal(0.61111111111111116, metrics.Accuracy, 4); - Assert.Equal(0.83950617283950613, metrics.Auc, 1); - Assert.Equal(0.88324268324268318, metrics.Auprc, 2); - Assert.Equal(1, metrics.Entropy, 3); - Assert.Equal(.72, metrics.F1Score, 4); - Assert.Equal(0.96456100297125325, metrics.LogLoss, 4); - Assert.Equal(3.5438997028746755, metrics.LogLossReduction, 4); - Assert.Equal(1, metrics.NegativePrecision, 3); - Assert.Equal(0.22222222222222221, metrics.NegativeRecall, 3); - Assert.Equal(0.5625, metrics.PositivePrecision, 3); - Assert.Equal(1, metrics.PositiveRecall); - - var matrix = metrics.ConfusionMatrix; - Assert.Equal(2, matrix.Order); - Assert.Equal(2, matrix.ClassNames.Count); - Assert.Equal("positive", matrix.ClassNames[0]); - Assert.Equal("negative", matrix.ClassNames[1]); - - Assert.Equal(9, matrix[0, 0]); - Assert.Equal(9, matrix["positive", "positive"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["positive", "negative"]); - - Assert.Equal(7, matrix[1, 0]); - Assert.Equal(7, matrix["negative", "positive"]); - Assert.Equal(2, matrix[1, 1]); - Assert.Equal(2, matrix["negative", "negative"]); - - } - - private void ValidateBinaryMetrics(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) - { - - Assert.Equal(0.6111, metrics.Accuracy, 4); - Assert.Equal(0.6667, metrics.Auc, 4); - Assert.Equal(0.8621, metrics.Auprc, 4); - Assert.Equal(1, metrics.Entropy, 3); - Assert.Equal(0.72, metrics.F1Score, 2); - Assert.Equal(0.9689, metrics.LogLoss, 4); - Assert.Equal(3.1122, metrics.LogLossReduction, 4); - Assert.Equal(1, metrics.NegativePrecision, 1); - Assert.Equal(0.2222, metrics.NegativeRecall, 4); - Assert.Equal(0.5625, metrics.PositivePrecision, 4); - Assert.Equal(1, metrics.PositiveRecall); - - var matrix = metrics.ConfusionMatrix; - Assert.Equal(2, matrix.Order); - Assert.Equal(2, matrix.ClassNames.Count); - Assert.Equal("positive", matrix.ClassNames[0]); - Assert.Equal("negative", matrix.ClassNames[1]); - - Assert.Equal(9, matrix[0, 0]); - Assert.Equal(9, matrix["positive", "positive"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["positive", "negative"]); - - Assert.Equal(7, matrix[1, 0]); - Assert.Equal(7, matrix["negative", "positive"]); - Assert.Equal(2, matrix[1, 1]); - Assert.Equal(2, matrix["negative", "negative"]); - } - - private Legacy.LearningPipeline PreparePipeline() - { - var dataPath = GetDataPath(SentimentDataPath); - var pipeline = new LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new Legacy.Data.TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new Legacy.Data.TextLoaderColumn() - { - Name = "Label", - Source = new [] { new Legacy.Data.TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new Legacy.Data.TextLoaderColumn() - { - Name = "SentimentText", - Source = new [] { new Legacy.Data.TextLoaderRange(1) }, - Type = Legacy.Data.DataKind.Text - } - } - } - }); - - pipeline.Add(new TextFeaturizer("Features", "SentimentText") - { - KeepPunctuations = false, - OutputTokens = true, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2, - CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } - }); - - - pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); - - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - return pipeline; - } - - private LearningPipeline PreparePipelineLightGBM() - { - var dataPath = GetDataPath(SentimentDataPath); - var pipeline = new LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new Legacy.Data.TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new Legacy.Data.TextLoaderColumn() - { - Name = "Label", - Source = new [] { new Legacy.Data.TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new Legacy.Data.TextLoaderColumn() - { - Name = "SentimentText", - Source = new [] { new Legacy.Data.TextLoaderRange(1) }, - Type = Legacy.Data.DataKind.Text - } - } - } - }); - - pipeline.Add(new TextFeaturizer("Features", "SentimentText") - { - KeepPunctuations = false, - OutputTokens = true, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2, - CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } - }); - - - pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 }); - - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - return pipeline; - } - - private LearningPipeline PreparePipelineSymSGD() - { - var dataPath = GetDataPath(SentimentDataPath); - var pipeline = new LearningPipeline(); - - pipeline.Add(new Legacy.Data.TextLoader(dataPath) - { - Arguments = new Legacy.Data.TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new Legacy.Data.TextLoaderColumn() - { - Name = "Label", - Source = new [] { new Legacy.Data.TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new Legacy.Data.TextLoaderColumn() - { - Name = "SentimentText", - Source = new [] { new Legacy.Data.TextLoaderRange(1) }, - Type = Legacy.Data.DataKind.Text - } - } - } - }); - - pipeline.Add(new TextFeaturizer("Features", "SentimentText") - { - KeepPunctuations = false, - OutputTokens = true, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2, - CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } - }); - - pipeline.Add(new SymSgdBinaryClassifier() { NumberOfThreads = 1 }); - - pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); - return pipeline; - } - - private void ValidateExamples(PredictionModel model, bool useLightGBM = false) - { - var sentiments = GetTestData(); - var predictions = model.Predict(sentiments); - Assert.Equal(2, predictions.Count()); - - Assert.True(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); - - } - - private void ValidateExamplesLightGBM(PredictionModel model) - { - var sentiments = GetTestData(); - var predictions = model.Predict(sentiments); - Assert.Equal(2, predictions.Count()); - - Assert.True(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); - } - - private void ValidateExamplesSymSGD(PredictionModel model) - { - var sentiments = GetTestData(); - var predictions = model.Predict(sentiments); - Assert.Equal(2, predictions.Count()); - - Assert.False(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); - } - - private Legacy.Data.TextLoader PrepareTextLoaderTestData() - { - var testDataPath = GetDataPath(SentimentTestPath); - var testData = new Legacy.Data.TextLoader(testDataPath) - { - Arguments = new Legacy.Data.TextLoaderArguments - { - Separator = new[] { '\t' }, - HasHeader = true, - Column = new[] - { - new Legacy.Data.TextLoaderColumn() - { - Name = "Label", - Source = new [] { new Legacy.Data.TextLoaderRange(0) }, - Type = Legacy.Data.DataKind.Num - }, - - new Legacy.Data.TextLoaderColumn() - { - Name = "SentimentText", - Source = new [] { new Legacy.Data.TextLoaderRange(1) }, - Type = Legacy.Data.DataKind.Text - } - } - } - }; - return testData; - } - - private IEnumerable GetTestData() - { - return new[] - { - new SentimentData - { - SentimentText = "Please refrain from adding nonsense to Wikipedia." - }, - new SentimentData - { - SentimentText = "He is a CHEATER, and the article should say that." - } - }; - } - - public class SentimentData - { - [LoadColumn(0), ColumnName("Label")] - public float Sentiment; - [LoadColumn(1)] - public string SentimentText; - } - - public class SentimentPrediction - { - [ColumnName("PredictedLabel")] - public bool Sentiment; - } - } -#pragma warning restore 612, 618 -} - diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index 01fdfeacdc..646eb7b148 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -2,16 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System.IO; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Model; using Microsoft.ML.RunTests; using Xunit; namespace Microsoft.ML.Scenarios { -#pragma warning disable 612 public partial class ScenariosTests { [Fact] @@ -46,7 +42,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest() // Make prediction and then evaluate the trained pipeline var predicted = trainedModel.Transform(testData); var metrics = mlContext.MulticlassClassification.Evaluate(predicted); - CompareMatrics(metrics); + CompareMetrics(metrics); var predictFunction = trainedModel.CreatePredictionEngine(mlContext); ComparePredictions(predictFunction); } @@ -90,7 +86,7 @@ private void ComparePredictions(PredictionEngine model Assert.Equal(0, prediction.PredictedLabels[2], 2); } - private void CompareMatrics(MultiClassClassifierMetrics metrics) + private void CompareMetrics(MultiClassClassifierMetrics metrics) { Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); @@ -102,39 +98,5 @@ private void CompareMatrics(MultiClassClassifierMetrics metrics) Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); } - - private ClassificationMetrics Evaluate(IHostEnvironment env, IDataView scoredData) - { - var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); - - // Evaluate. - // It does not work. It throws error "Failed to find 'Score' column" when Evaluate is called - //var evaluator = new MultiClassClassifierEvaluator(env, new MultiClassClassifierEvaluator.Arguments() { OutputTopKAcc = 3 }); - - IMamlEvaluator evaluator = new MultiClassMamlEvaluator(env, new MultiClassMamlEvaluator.Arguments() { OutputTopKAcc = 3 }); - var metricsDic = evaluator.Evaluate(dataEval); - - return ClassificationMetrics.FromMetrics(env, metricsDic["OverallMetrics"], metricsDic["ConfusionMatrix"])[0]; - } - - private IDataScorerTransform GetScorer(IHostEnvironment env, IDataView transforms, IPredictor pred, string testDataPath = null) - { - using (var ch = env.Start("Saving model")) - using (var memoryStream = new MemoryStream()) - { - var trainRoles = new RoleMappedData(transforms, label: "Label", feature: "Features"); - - // Model cannot be saved with CacheDataView - TrainUtils.SaveModel(env, ch, memoryStream, pred, trainRoles); - memoryStream.Position = 0; - using (var rep = RepositoryReader.Open(memoryStream, ch)) - { - IDataLoader testPipe = ModelFileUtils.LoadLoader(env, rep, new MultiFileSource(testDataPath), true); - RoleMappedData testRoles = new RoleMappedData(testPipe, label: "Label", feature: "Features"); - return ScoreUtils.GetScorer(pred, testRoles, env, testRoles.Schema); - } - } - } } -#pragma warning restore 612 } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs deleted file mode 100644 index 55dcbd6c7f..0000000000 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ /dev/null @@ -1,162 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Linq; -using Microsoft.ML.Data; -using Microsoft.ML.Internal.Internallearn; -using Microsoft.ML.Trainers.FastTree; -using Microsoft.ML.Transforms.Text; -using Xunit; - -namespace Microsoft.ML.Scenarios -{ -#pragma warning disable 612 - public partial class ScenariosTests - { - [Fact] - public void TrainAndPredictSentimentModelWithDirectionInstantiationTest() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentTestPath); - - var env = new MLContext(seed: 1, conc: 1); - // Pipeline - var loader = env.Data.ReadFromTextFile(dataPath, - columns: new[] - { - new TextLoader.Column("Label", DataKind.Num, 0), - new TextLoader.Column("SentimentText", DataKind.Text, 1) - }, - hasHeader: true - ); - - var trans = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments() - { - Column = new TextFeaturizingEstimator.Column - { - Name = "Features", - Source = new[] { "SentimentText" } - }, - OutputTokens = true, - KeepPunctuations = false, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimator.TextNormKind.L2, - CharFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 3, AllLengths = false }, - WordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 2, AllLengths = true }, - }, - loader); - - // Train - var trainer = new FastTreeBinaryClassificationTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features, - numLeaves: 5, numTrees: 5, minDatapointsInLeaves: 2); - - var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - var pred = trainer.Train(trainRoles); - - // Get scorer and evaluate the predictions from test data - IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); - var metrics = EvaluateBinary(env, testDataScorer); - ValidateBinaryMetrics(metrics); - - // Create prediction engine and test predictions - var model = env.CreateBatchPredictionEngine(testDataScorer); - var sentiments = GetTestData(); - var predictions = model.Predict(sentiments, false); - Assert.Equal(2, predictions.Count()); - Assert.True(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); - - // Get feature importance based on feature gain during training - var summary = ((ICanGetSummaryInKeyValuePairs)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); - Assert.Equal(1.0, (double)summary[0].Value, 1); - } - - [Fact] - public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordEmbedding() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentTestPath); - - var env = new MLContext(seed: 1, conc: 1); - // Pipeline - var loader = env.Data.ReadFromTextFile(dataPath, - columns: new[] - { - new TextLoader.Column("Label", DataKind.Num, 0), - new TextLoader.Column("SentimentText", DataKind.Text, 1) - }, - hasHeader: true - ); - - var text = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments() - { - Column = new TextFeaturizingEstimator.Column - { - Name = "WordEmbeddings", - Source = new[] { "SentimentText" } - }, - OutputTokens = true, - KeepPunctuations = false, - UsePredefinedStopWordRemover = true, - VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None, - CharFeatureExtractor = null, - WordFeatureExtractor = null, - }, - loader); - - var trans = WordEmbeddingsExtractingTransformer.Create(env, new WordEmbeddingsExtractingTransformer.Arguments() - { - Column = new WordEmbeddingsExtractingTransformer.Column[1] - { - new WordEmbeddingsExtractingTransformer.Column - { - Name = "Features", - Source = "WordEmbeddings_TransformedText" - } - }, - ModelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe, - }, text); - // Train - var trainer = new FastTreeBinaryClassificationTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features, numLeaves: 5, numTrees: 5, minDatapointsInLeaves: 2); - - var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - var pred = trainer.Train(trainRoles); - // Get scorer and evaluate the predictions from test data - IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); - var metrics = EvaluateBinary(env, testDataScorer); - - // SSWE is a simple word embedding model + we train on a really small dataset, so metrics are not great. - Assert.Equal(.6667, metrics.Accuracy, 4); - Assert.Equal(.71, metrics.Auc, 1); - Assert.Equal(.58, metrics.Auprc, 2); - // Create prediction engine and test predictions - var model = env.CreateBatchPredictionEngine(testDataScorer); - var sentiments = GetTestData(); - var predictions = model.Predict(sentiments, false); - Assert.Equal(2, predictions.Count()); - Assert.True(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); - - // Get feature importance based on feature gain during training - var summary = ((ICanGetSummaryInKeyValuePairs)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); - Assert.Equal(1.0, (double)summary[0].Value, 1); - } - - private Microsoft.ML.Legacy.Models.BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) - { - var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); - - // Evaluate. - // It does not work. It throws error "Failed to find 'Score' column" when Evaluate is called - //var evaluator = new BinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()); - - IMamlEvaluator evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()); - var metricsDic = evaluator.Evaluate(dataEval); - - return Microsoft.ML.Legacy.Models.BinaryClassificationMetrics - .FromMetrics(env, metricsDic["OverallMetrics"], metricsDic["ConfusionMatrix"])[0]; - } - } -#pragma warning restore 612 -} diff --git a/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs b/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs index 7d226bccff..ada0e85a4b 100644 --- a/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/TensorFlowEstimatorTests.cs @@ -7,9 +7,9 @@ using System.IO; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.ImageAnalytics; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.TensorFlow.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; @@ -148,7 +148,7 @@ public void TestTensorFlowStatic() var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); - var data = TextLoader.CreateReader(mlContext, ctx => ( + var data = TextLoaderStatic.CreateReader(mlContext, ctx => ( imagePath: ctx.LoadText(0), name: ctx.LoadText(1))) .Read(dataFile); @@ -195,7 +195,7 @@ public void TestTensorFlowStaticWithSchema() var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); - var data = TextLoader.CreateReader(mlContext, ctx => ( + var data = TextLoaderStatic.CreateReader(mlContext, ctx => ( imagePath: ctx.LoadText(0), name: ctx.LoadText(1))) .Read(dataFile); diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index 145328beb0..cffc70a22e 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -6,20 +6,20 @@ using System.Collections.Generic; using System.IO; using Microsoft.ML.Data; +using Microsoft.ML.EntryPoints.JsonUtils; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; +using Newtonsoft.Json.Linq; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.EntryPoints.Tests { -#pragma warning disable 612, 618 public sealed class TextLoaderTestPipe : TestDataPipeBase { public TextLoaderTestPipe(ITestOutputHelper output) : base(output) { - } [Fact] @@ -132,56 +132,167 @@ public void TestTextLoaderInvalidLongMax() public class TextLoaderTests : BaseTestClass { + ConsoleEnvironment env; public TextLoaderTests(ITestOutputHelper output) : base(output) { - + env = new ConsoleEnvironment(42).AddStandardComponents(); } [Fact] public void ConstructorDoesntThrow() { - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom()); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: true)); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom()); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false)); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, supportSparse: false, trimWhitespace: false)); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, supportSparse: false)); - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, allowQuotedStrings: false)); - - Assert.NotNull(new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom()); + var mlContext = new MLContext(seed: 1, conc: 1); + + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt")); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt", hasHeader: true)); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt", hasHeader: false)); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt", hasHeader: false, supportSparse: false, trimWhitespace: false)); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt", hasHeader: false, supportSparse: false)); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt", hasHeader: false, allowQuotedStrings: false)); + Assert.NotNull(mlContext.Data.ReadFromTextFile("fakeFile.txt")); } [Fact] public void CanSuccessfullyApplyATransform() { - var loader = new Legacy.Data.TextLoader("fakeFile.txt").CreateFrom(); - - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep; - - Assert.NotNull(output.Data); - Assert.NotNull(output.Data.VarName); - Assert.Null(output.Model); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': true, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'String1', + 'Type': 'TX', + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Number1', + 'Type': 'R4', + 'Source': [{ + 'Min': 1, + 'Max': 1, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': false + } + }, + 'Outputs': { + 'Data': '$data' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(env, "fakeFile.txt", false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("data"); + Assert.NotNull(data); } [Fact] public void CanSuccessfullyRetrieveQuotedData() { string dataPath = GetDataPath("QuotingData.csv"); - var loader = new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false); - - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep; - - experiment.Compile(); - loader.SetInput(environment, experiment); - experiment.Run(); - - IDataView data = experiment.GetOutput(output.Data); - Assert.NotNull(data); + string inputGraph = @" + { + 'Nodes':[ + { + 'Name':'Data.TextLoader', + 'Inputs':{ + 'InputFile':'$inputFile', + 'Arguments':{ + 'UseThreads':true, + 'HeaderFile':null, + 'MaxRows':null, + 'AllowQuoting':true, + 'AllowSparse':false, + 'InputSize':null, + 'Separator':[ + ',' + ], + 'Column':[ + { + 'Name':'ID', + 'Type':'R4', + 'Source':[ + { + 'Min':0, + 'Max':0, + 'AutoEnd':false, + 'VariableEnd':false, + 'AllOther':false, + 'ForceVector':false + } + ], + 'KeyRange':null + }, + { + 'Name':'Text', + 'Type':'TX', + 'Source':[ + { + 'Min':1, + 'Max':1, + 'AutoEnd':false, + 'VariableEnd':false, + 'AllOther':false, + 'ForceVector':false + } + ], + 'KeyRange':null + } + ], + 'TrimWhitespace':false, + 'HasHeader':true + } + }, + 'Outputs':{ + 'Data':'$data' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("data"); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) { @@ -226,17 +337,108 @@ public void CanSuccessfullyRetrieveQuotedData() public void CanSuccessfullyRetrieveSparseData() { string dataPath = GetDataPath("SparseData.txt"); - var loader = new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true, allowQuotedStrings: false, supportSparse: true); - - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep; - - experiment.Compile(); - loader.SetInput(environment, experiment); - experiment.Run(); - - IDataView data = experiment.GetOutput(output.Data); + string inputGraph = @" + { + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': false, + 'AllowSparse': true, + 'InputSize': null, + 'Separator': [ + '\t' + ], + 'Column': [{ + 'Name': 'C1', + 'Type': 'R4', + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'C2', + 'Type': 'R4', + 'Source': [{ + 'Min': 1, + 'Max': 1, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'C3', + 'Type': 'R4', + 'Source': [{ + 'Min': 2, + 'Max': 2, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'C4', + 'Type': 'R4', + 'Source': [{ + 'Min': 3, + 'Max': 3, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'C5', + 'Type': 'R4', + 'Source': [{ + 'Min': 4, + 'Max': 4, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': false, + 'HasHeader': true + } + }, + 'Outputs': { + 'Data': '$data' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("data"); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) @@ -289,17 +491,68 @@ public void CanSuccessfullyRetrieveSparseData() public void CanSuccessfullyTrimSpaces() { string dataPath = GetDataPath("TrimData.csv"); - var loader = new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); - - var environment = new MLContext(); - Experiment experiment = environment.CreateExperiment(); - Legacy.ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as Legacy.ILearningPipelineDataStep; - - experiment.Compile(); - loader.SetInput(environment, experiment); - experiment.Run(); - - IDataView data = experiment.GetOutput(output.Data); + string inputGraph = @"{ + 'Nodes': + [{ + 'Name': 'Data.TextLoader', + 'Inputs': { + 'InputFile': '$inputFile', + 'Arguments': { + 'UseThreads': true, + 'HeaderFile': null, + 'MaxRows': null, + 'AllowQuoting': false, + 'AllowSparse': false, + 'InputSize': null, + 'Separator': [ + ',' + ], + 'Column': [{ + 'Name': 'ID', + 'Type': 'R4', + 'Source': [{ + 'Min': 0, + 'Max': 0, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + }, { + 'Name': 'Text', + 'Type': 'TX', + 'Source': [{ + 'Min': 1, + 'Max': 1, + 'AutoEnd': false, + 'VariableEnd': false, + 'AllOther': false, + 'ForceVector': false + } + ], + 'KeyRange': null + } + ], + 'TrimWhitespace': true, + 'HasHeader': true + } + }, + 'Outputs': { + 'Data': '$data' + } + } + ] + }"; + + JObject graph = JObject.Parse(inputGraph); + var runner = new GraphRunner(env, graph[FieldNames.Nodes] as JArray); + var inputFile = new SimpleFileHandle(env, dataPath, false, false); + runner.SetInput("inputFile", inputFile); + runner.RunAll(); + + var data = runner.GetOutput("data"); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) @@ -334,17 +587,14 @@ public void CanSuccessfullyTrimSpaces() [Fact] public void ThrowsExceptionWithPropertyName() { - Exception ex = Assert.Throws(() => new Legacy.Data.TextLoader("fakefile.txt").CreateFrom()); - Assert.StartsWith($"Field or property String1 is missing {nameof(LoadColumnAttribute)}", ex.Message); - } - - [Fact] - public void CanSuccessfullyColumnNameProperty() - { - var loader = new Legacy.Data.TextLoader("fakefile.txt").CreateFrom(); - Assert.Equal("Col1", loader.Arguments.Column[0].Name); - Assert.Equal("Col2", loader.Arguments.Column[1].Name); - Assert.Equal("String_3", loader.Arguments.Column[2].Name); + var mlContext = new MLContext(seed: 1, conc: 1); + try + { + mlContext.Data.ReadFromTextFile("fakefile.txt"); + } + // REVIEW: the issue of different exceptions being thrown is tracked under #2037. + catch (Xunit.Sdk.TrueException) { } + catch (NullReferenceException) { }; } public class QuoteInput @@ -459,7 +709,7 @@ public class IrisColumnIndices public void LoaderColumnsFromIrisData() { var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); - var ml = new MLContext(); + var mlContext = new MLContext(); var irisFirstRow = new Dictionary(); irisFirstRow["SepalLength"] = 5.1f; @@ -467,10 +717,10 @@ public void LoaderColumnsFromIrisData() irisFirstRow["PetalLength"] = 1.4f; irisFirstRow["PetalWidth"] = 0.2f; - var irisFirstRowValues = irisFirstRow.Values.GetEnumerator(); + var irisFirstRowValues = irisFirstRow.Values.GetEnumerator(); // Simple load - var dataIris = ml.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIris = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); var previewIris = dataIris.Preview(1); Assert.Equal(5, previewIris.ColumnView.Length); @@ -486,7 +736,7 @@ public void LoaderColumnsFromIrisData() Assert.Equal("Iris-setosa", previewIris.RowView[0].Values[index].Value.ToString()); // Load with start and end indexes - var dataIrisStartEnd = ml.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIrisStartEnd = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); var previewIrisStartEnd = dataIrisStartEnd.Preview(1); Assert.Equal(2, previewIrisStartEnd.ColumnView.Length); @@ -503,7 +753,7 @@ public void LoaderColumnsFromIrisData() } // load setting the distinct columns. Loading column 0 and 2 - var dataIrisColumnIndices = ml.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIrisColumnIndices = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); var previewIrisColumnIndices = dataIrisColumnIndices.Preview(1); Assert.Equal(2, previewIrisColumnIndices.ColumnView.Length); @@ -519,5 +769,4 @@ public void LoaderColumnsFromIrisData() Assert.Equal(vals4[1], irisFirstRowValues.Current); } } -#pragma warning restore 612, 618 } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index 30cab6c409..b70921066d 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Linq; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; using Microsoft.ML.Internal.Calibration; @@ -54,7 +55,7 @@ public void TestEstimatorPoissonRegression() } [Fact] - public void TestLogisticRegressionStats() + public void TestLogisticRegressionNoStats() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); @@ -70,7 +71,7 @@ public void TestLogisticRegressionStats() } [Fact] - public void TestLogisticRegressionStats_MKL() + public void TestLogisticRegressionWithStats() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); @@ -80,14 +81,24 @@ public void TestLogisticRegressionStats_MKL() s.StdComputer = new ComputeLRTrainingStdThroughHal(); })); - var transformerChain = pipe.Fit(dataView) as TransformerChain>; + var transformer = pipe.Fit(dataView) as TransformerChain>; - var linearModel = transformerChain.LastTransformer.Model.SubPredictor as LinearBinaryModelParameters; + var linearModel = transformer.LastTransformer.Model.SubPredictor as LinearBinaryModelParameters; var stats = linearModel.Statistics; LinearModelStatistics.TryGetBiasStatistics(stats, 2, out float stdError, out float zScore, out float pValue); CompareNumbersWithTolerance(stdError, 0.250672936); CompareNumbersWithTolerance(zScore, 7.97852373); + + var scoredData = transformer.Transform(dataView); + + var coeffcients = stats.GetCoefficientStatistics(linearModel, scoredData.Schema["Features"], 100); + + Assert.Equal(19, coeffcients.Length); + + foreach(var coefficient in coeffcients) + Assert.True(coefficient.StandardError < 1.0); + } } } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs index 32060e4587..615712a725 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML; -using Microsoft.ML.Data; using Microsoft.ML.StaticPipe; using Microsoft.ML.Trainers.Online; using Xunit; @@ -17,7 +16,7 @@ public void OnlineLinearWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); - var regressionData = TextLoader.CreateReader(ML, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) + var regressionData = TextLoaderStatic.CreateReader(ML, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); var regressionPipe = regressionData.MakeNewEstimator() @@ -30,7 +29,7 @@ public void OnlineLinearWorkout() var ogdModel = ogdTrainer.Fit(regressionTrainData); ogdTrainer.Train(regressionTrainData, ogdModel.Model); - var binaryData = TextLoader.CreateReader(ML, ctx => (Label: ctx.LoadBool(0), Features: ctx.LoadFloat(1, 10))) + var binaryData = TextLoaderStatic.CreateReader(ML, ctx => (Label: ctx.LoadBool(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); var binaryPipe = binaryData.MakeNewEstimator() diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs index 99efb5f82c..f32242cedc 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs @@ -2,7 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML.Data; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Trainers; using Xunit; @@ -15,7 +15,7 @@ public void SdcaWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); - var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) + var data = TextLoaderStatic.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath).Cache(); var binaryTrainer = new SdcaBinaryTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs index 55b46c1ca1..75195c68cf 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs @@ -65,7 +65,7 @@ public void CategoricalHashWorkout() public void CategoricalHashStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4))); var data = reader.Read(dataPath); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index e18cfa5dda..924ee8ba73 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -82,7 +82,7 @@ public void CategoricalOneHotHashEncoding() public void CategoricalStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4))); var data = reader.Read(dataPath); diff --git a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs index e73640a2ed..b5c047b00f 100644 --- a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Data.IO; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.FeatureSelection; @@ -27,12 +28,12 @@ public FeatureSelectionTests(ITestOutputHelper helper) public void FeatureSelectionWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(ML, ctx => ( + var data = TextLoaderStatic.CreateReader(ML, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(ML, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(ML, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); @@ -61,7 +62,7 @@ public void FeatureSelectionWorkout() public void DropSlotsTransform() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(ML, ctx => ( + var reader = TextLoaderStatic.CreateReader(ML, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), @@ -104,7 +105,7 @@ public void TestDropSlotsSelectionCommandLine() public void CountFeatureSelectionWorkout() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(ML, ctx => ( + var reader = TextLoaderStatic.CreateReader(ML, ctx => ( ScalarFloat: ctx.LoadFloat(6), VectorFloat: ctx.LoadFloat(1, 4), VectorDouble: ctx.LoadDouble(4, 8) @@ -147,7 +148,7 @@ public void TestCountFeatureSelectionCommandLine() public void TestCountSelectOldSavingAndLoading() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(ML, ctx => ( + var reader = TextLoaderStatic.CreateReader(ML, ctx => ( Label: ctx.LoadKey(0, 0, 2), VectorFloat: ctx.LoadFloat(1, 4) )); @@ -171,7 +172,7 @@ public void TestCountSelectOldSavingAndLoading() public void MutualInformationSelectionWorkout() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(ML, ctx => ( + var reader = TextLoaderStatic.CreateReader(ML, ctx => ( Label: ctx.LoadKey(0, 0, 2), ScalarFloat: ctx.LoadFloat(6), VectorFloat: ctx.LoadFloat(1, 4), @@ -211,7 +212,7 @@ public void TestMutualInformationFeatureSelectionCommandLine() public void TestMutualInformationOldSavingAndLoading() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(ML, ctx => ( + var reader = TextLoaderStatic.CreateReader(ML, ctx => ( Label: ctx.LoadKey(0, 0, 2), VectorFloat: ctx.LoadFloat(1, 4) )); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs index 8ebfc775de..5c9d416d2f 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs @@ -61,7 +61,7 @@ public void KeyToBinaryVectorWorkout() public void KeyToBinaryVectorStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4) )); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs index b89584a2cd..9331c73b0e 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToValueTests.cs @@ -72,7 +72,7 @@ public void KeyToValueWorkout() public void KeyToValuePigsty() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4) )); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs index 29082202f5..967231f604 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs @@ -69,7 +69,7 @@ public void KeyToVectorWorkout() public void KeyToVectorStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarString: ctx.LoadText(1), VectorString: ctx.LoadText(1, 4) )); diff --git a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs index a3567ab1a3..c1ac8d7bae 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Data.IO; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Categorical; @@ -84,7 +85,7 @@ public void TestOldSavingAndLoading() public void NAIndicatorFileOutput() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), diff --git a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs index a7a058756f..888ba4057a 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs @@ -56,7 +56,7 @@ public void NAReplaceWorkout() public void NAReplaceStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), VectorFloat: ctx.LoadFloat(1, 4), diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 828b8c361a..c88f1183b5 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Data.IO; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Normalizers; @@ -260,12 +261,12 @@ public void SimpleConstructorsAndExtensions() public void LpGcNormAndWhiteningWorkout() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var data = TextLoader.CreateReader(ML, + var data = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); - var invalidData = TextLoader.CreateReader(ML, + var invalidData = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); @@ -294,12 +295,12 @@ public void LpGcNormAndWhiteningWorkout() public void WhiteningWorkout() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var data = TextLoader.CreateReader(ML, + var data = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); - var invalidData = TextLoader.CreateReader(ML, + var invalidData = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); @@ -333,7 +334,7 @@ public void TestWhiteningCommandLine() public void TestWhiteningOldSavingAndLoading() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var dataView = TextLoader.CreateReader(ML, + var dataView = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource).AsDynamic; @@ -354,12 +355,12 @@ public void TestWhiteningOldSavingAndLoading() public void LpNormWorkout() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var data = TextLoader.CreateReader(ML, + var data = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); - var invalidData = TextLoader.CreateReader(ML, + var invalidData = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); @@ -393,7 +394,7 @@ public void TestLpNormCommandLine() public void TestLpNormOldSavingAndLoading() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var dataView = TextLoader.CreateReader(ML, + var dataView = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource).AsDynamic; @@ -413,12 +414,12 @@ public void TestLpNormOldSavingAndLoading() public void GcnWorkout() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var data = TextLoader.CreateReader(ML, + var data = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); - var invalidData = TextLoader.CreateReader(ML, + var invalidData = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(dataSource); @@ -452,7 +453,7 @@ public void TestGcnNormCommandLine() public void TestGcnNormOldSavingAndLoading() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); - var dataView = TextLoader.CreateReader(ML, + var dataView = TextLoaderStatic.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource).AsDynamic; diff --git a/test/Microsoft.ML.Tests/Transformers/PcaTests.cs b/test/Microsoft.ML.Tests/Transformers/PcaTests.cs index 94a2c462eb..f1fb2da4f8 100644 --- a/test/Microsoft.ML.Tests/Transformers/PcaTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/PcaTests.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Data; using Microsoft.ML.Data.IO; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Projections; using Xunit; @@ -30,12 +31,12 @@ public PcaTests(ITestOutputHelper helper) [Fact] public void PcaWorkout() { - var data = TextLoader.CreateReader(_env, + var data = TextLoaderStatic.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadFloat(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); - var invalidData = TextLoader.CreateReader(_env, + var invalidData = TextLoaderStatic.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadText(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); @@ -52,7 +53,7 @@ public void PcaWorkout() [Fact] public void TestPcaEstimator() { - var data = TextLoader.CreateReader(_env, + var data = TextLoaderStatic.CreateReader(_env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(_dataSource); diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs index ac65762246..3bdfa50903 100644 --- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs @@ -1,10 +1,15 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.IO; using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Data.IO; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Projections; @@ -62,7 +67,7 @@ public void RffWorkout() public void RffStatic() { string dataPath = GetDataPath("breast-cancer.txt"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( VectorFloat: ctx.LoadFloat(1, 8), Label: ctx.LoadFloat(0) )); diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs index 0c33dc549b..432bbb8291 100644 --- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs @@ -30,12 +30,12 @@ public TextFeaturizerTests(ITestOutputHelper helper) public void TextFeaturizerWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(Env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath) @@ -65,12 +65,12 @@ public void TextFeaturizerWorkout() public void TextTokenizationWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(Env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); @@ -99,7 +99,7 @@ public void TextTokenizationWorkout() public void TokenizeWithSeparators() { string dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(dataPath).AsDynamic; @@ -136,12 +136,12 @@ public void TokenizeWithSeparatorCommandLine() public void TextNormalizationAndStopwordRemoverWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(Env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); @@ -202,12 +202,12 @@ public void StopWordsRemoverFromFactory() public void WordBagWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(Env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); @@ -238,12 +238,12 @@ public void WordBagWorkout() public void NgramWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(Env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); @@ -275,7 +275,7 @@ void TestNgramCompatColumns() { string dropModelPath = GetDataPath("backcompat/ngram.zip"); string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(ML, ctx => ( + var data = TextLoaderStatic.CreateReader(ML, ctx => ( Sentiment: ctx.LoadBool(0), SentimentText: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); @@ -292,12 +292,12 @@ public void LdaWorkout() { IHostEnvironment env = new MLContext(seed: 42, conc: 1); string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var data = TextLoader.CreateReader(env, ctx => ( + var data = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); - var invalidData = TextLoader.CreateReader(env, ctx => ( + var invalidData = TextLoaderStatic.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); diff --git a/test/Microsoft.ML.Tests/Transformers/TextNormalizer.cs b/test/Microsoft.ML.Tests/Transformers/TextNormalizer.cs index dc1f76f4b2..415dd7f1ca 100644 --- a/test/Microsoft.ML.Tests/Transformers/TextNormalizer.cs +++ b/test/Microsoft.ML.Tests/Transformers/TextNormalizer.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Data.IO; using Microsoft.ML.Model; using Microsoft.ML.RunTests; +using Microsoft.ML.StaticPipe; using Microsoft.ML.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; @@ -50,7 +51,7 @@ public void TextNormalizerWorkout() TestEstimatorCore(pipe, dataView, invalidInput: invalidDataView); var dataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); - var reader = TextLoader.CreateReader(Env, ctx => ( + var reader = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true); var dataSource = new MultiFileSource(dataPath); diff --git a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs index 56b3c9105a..036030a166 100644 --- a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs @@ -22,10 +22,10 @@ public WordEmbeddingsTests(ITestOutputHelper helper) [Fact] public void TestWordEmbeddings() { - var dataPath = GetDataPath(ScenariosTests.SentimentDataPath); - var testDataPath = GetDataPath(ScenariosTests.SentimentTestPath); + var dataPath = GetDataPath(TestDatasets.Sentiment.trainFilename); + var testDataPath = GetDataPath(TestDatasets.Sentiment.testFilename); - var data = TextLoader.CreateReader(Env, ctx => ( + var data = TextLoaderStatic.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), SentimentText: ctx.LoadText(1)), hasHeader: true) .Read(dataPath);