Skip to content

Commit

Permalink
Merge pull request #1094 from shauheen/cp06
Browse files Browse the repository at this point in the history
Cherrypick to update release for 0.6
  • Loading branch information
shauheen authored Sep 28, 2018
2 parents 2469e3c + bea3e61 commit 59ebda7
Show file tree
Hide file tree
Showing 50 changed files with 1,632 additions and 348 deletions.
2 changes: 1 addition & 1 deletion build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<MicrosoftCodeAnalysisCSharpVersion>2.9.0</MicrosoftCodeAnalysisCSharpVersion>
<MicrosoftCSharpVersion>4.5.0</MicrosoftCSharpVersion>
<SystemCompositionVersion>1.2.0</SystemCompositionVersion>
<MicrosoftMLScoring>1.0.4-dev48825</MicrosoftMLScoring>
<MicrosoftMLScoring>1.1.0</MicrosoftMLScoring>
<SystemIOFileSystemAccessControl>4.5.0</SystemIOFileSystemAccessControl>
<SystemSecurityPrincipalWindows>4.5.0</SystemSecurityPrincipalWindows>
</PropertyGroup>
Expand Down
28 changes: 12 additions & 16 deletions src/Common/AssemblyLoadingUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,12 @@ private static bool ShouldSkipPath(string path)
string name = Path.GetFileName(path).ToLowerInvariant();
switch (name)
{
case "cpumathnative.dll":
case "cqo.dll":
case "fasttreenative.dll":
case "factorizationmachinenative.dll":
case "libiomp5md.dll":
case "ldanative.dll":
case "libvw.dll":
case "matrixinterf.dll":
case "microsoft.ml.neuralnetworks.gpucuda.dll":
Expand All @@ -154,6 +157,7 @@ private static bool ShouldSkipPath(string path)
case "parallelcommunicator.dll":
case "microsoft.ml.runtime.runtests.dll":
case "scopecompiler.dll":
case "symsgdnative.dll":
case "tbb.dll":
case "internallearnscope.dll":
case "unmanagedlib.dll":
Expand All @@ -179,20 +183,16 @@ private static void LoadAssembliesInDir(IHostEnvironment env, string dir, bool f
if (!Directory.Exists(dir))
return;

using (var ch = env.Start("LoadAssembliesInDir"))
// Load all dlls in the given directory.
var paths = Directory.EnumerateFiles(dir, "*.dll");
foreach (string path in paths)
{
// Load all dlls in the given directory.
var paths = Directory.EnumerateFiles(dir, "*.dll");
foreach (string path in paths)
if (filter && ShouldSkipPath(path))
{
if (filter && ShouldSkipPath(path))
{
ch.Info($"Skipping assembly '{path}' because its name was filtered.");
continue;
}

LoadAssembly(env, path);
continue;
}

LoadAssembly(env, path);
}
}

Expand All @@ -206,12 +206,8 @@ private static Assembly LoadAssembly(IHostEnvironment env, string path)
{
assembly = Assembly.LoadFrom(path);
}
catch (Exception e)
catch (Exception)
{
using (var ch = env.Start("LoadAssembly"))
{
ch.Error("Could not load assembly {0}:\n{1}", path, e.ToString());
}
return null;
}

Expand Down
9 changes: 8 additions & 1 deletion src/Microsoft.ML.Core/Utilities/Contracts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,14 @@ public static T CheckRef<T>(this IExceptionContext ctx, T val, string paramName)
return val;
}

public static void CheckValue<T>(T val, string paramName) where T : class
public static T CheckRef<T>(this IExceptionContext ctx, T val, string paramName, string msg) where T : class
{
if (object.ReferenceEquals(val, null))
throw ExceptValue(ctx, paramName, msg);
return val;
}

public static void CheckValue<T>(T val, string paramName) where T : class
{
if (object.ReferenceEquals(val, null))
throw ExceptValue(paramName);
Expand Down
13 changes: 13 additions & 0 deletions src/Microsoft.ML.Data/Data/RowCursorUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,19 @@ public static IRow CloneRow(IRow row)
Utils.BuildArray(row.Schema.ColumnCount, c => RowColumnUtils.GetColumn(row, c)));
}

/// <summary>
/// Fetches the value of the column by name, in the given row.
/// Used by the evaluators to retrieve the metrics from the results IDataView.
/// </summary>
public static T Fetch<T>(IExceptionContext ectx, IRow row, string name)
{
if (!row.Schema.TryGetColumnIndex(name, out int col))
throw ectx.Except($"Could not find column '{name}'");
T val = default;
row.GetGetter<T>(col)(ref val);
return val;
}

/// <summary>
/// Given a row, returns a one-row data view. This is useful for cases where you have a row, and you
/// wish to use some facility normally only exposed to dataviews. (E.g., you have an <see cref="IRow"/>
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataLoadSave/EstimatorChain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public EstimatorChain<TNewTrans> Append<TNewTrans>(IEstimator<TNewTrans> estimat
where TNewTrans : class, ITransformer
{
Contracts.CheckValue(estimator, nameof(estimator));
return new EstimatorChain<TNewTrans>(_estimators.Append(estimator).ToArray(), _scopes.Append(scope).ToArray());
return new EstimatorChain<TNewTrans>(_estimators.AppendElement(estimator), _scopes.AppendElement(scope));
}
}
}
9 changes: 9 additions & 0 deletions src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime.Internal.Utilities;

namespace Microsoft.ML.Runtime.Data
{
Expand Down Expand Up @@ -123,5 +124,13 @@ public static IEstimator<TTransformer> WithOnFitDelegate<TTransformer>(this IEst
Contracts.CheckValue(onFit, nameof(onFit));
return new DelegateEstimator<TTransformer>(estimator, onFit);
}

internal static T[] AppendElement<T>(this T[] array, T element)
{
T[] result = new T[Utils.Size(array) + 1];
Array.Copy(array, result, result.Length - 1);
result[result.Length - 1] = element;
return result;
}
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataLoadSave/TransformerChain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public TransformerChain<TNewLast> Append<TNewLast>(TNewLast transformer, Transfo
where TNewLast : class, ITransformer
{
Contracts.CheckValue(transformer, nameof(transformer));
return new TransformerChain<TNewLast>(_transformers.Append(transformer).ToArray(), _scopes.Append(scope).ToArray());
return new TransformerChain<TNewLast>(_transformers.AppendElement(transformer), _scopes.AppendElement(scope));
}

public void Save(ModelSaveContext ctx)
Expand Down
80 changes: 80 additions & 0 deletions src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,46 @@ public ClusteringEvaluator(IHostEnvironment env, Arguments args)
_calculateDbi = args.CalculateDbi;
}

/// <summary>
/// Evaluates scored clustering data.
/// </summary>
/// <param name="data">The scored data.</param>
/// <param name="score">The name of the score column in <paramref name="data"/>.</param>
/// <param name="label">The name of the optional label column in <paramref name="data"/>.</param>
/// <param name="features">The name of the optional feature column in <paramref name="data"/>.</param>
/// <returns>The evaluation results.</returns>
public Result Evaluate(IDataView data, string score, string label = null, string features = null)
{
Host.CheckValue(data, nameof(data));
Host.CheckNonEmpty(score, nameof(score));

var roles = new List<KeyValuePair<RoleMappedSchema.ColumnRole, string>>();
roles.Add(RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, score));

if (label != null)
roles.Add(RoleMappedSchema.ColumnRole.Label.Bind(label));

if (features != null)
roles.Add(RoleMappedSchema.ColumnRole.Feature.Bind(features));

var rolesMappedData = new RoleMappedData(data, opt: false, roles.ToArray());

var resultDict = Evaluate(rolesMappedData);
Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics));
var overall = resultDict[MetricKinds.OverallMetrics];

Result result;
using (var cursor = overall.GetRowCursor(i => true))
{
var moved = cursor.MoveNext();
Host.Assert(moved);
result = new Result(Host, cursor, _calculateDbi);
moved = cursor.MoveNext();
Host.Assert(!moved);
}
return result;
}

protected override void CheckScoreAndLabelTypes(RoleMappedSchema schema)
{
ColumnType type;
Expand Down Expand Up @@ -517,6 +557,46 @@ private void AssertValid(bool assertGetters)
}
}
}

/// <summary>
/// The metrics generated after evaluating the clustering predictions.
/// </summary>
public sealed class Result
{
/// <summary>
/// Normalized Mutual Information
/// NMI is a measure of the mutual dependence of the variables.
/// <a href="http://en.wikipedia.org/wiki/Mutual_information#Normalized_variants">Normalized variants</a> work on data that already has cluster labels.
/// Its value ranged from 0 to 1, where higher numbers are better.
/// </summary>
public double Nmi { get; }

/// <summary>
/// Average Score. For the K-Means algorithm, the 'score' is the distance from the centroid to the example.
/// The average score is, therefore, a measure of proximity of the examples to cluster centroids.
/// In other words, it's the 'cluster tightness' measure.
/// Note however, that this metric will only decrease if the number of clusters is increased,
/// and in the extreme case (where each distinct example is its own cluster) it will be equal to zero.
/// </summary>
public double AvgMinScore { get; }

/// <summary>
/// <a href="https://en.wikipedia.org/wiki/Davies–Bouldin_index">Davies-Bouldin Index</a>
/// DBI is a measure of the how much scatter is in the cluster and the cluster separation.
/// </summary>
public double Dbi { get; }

internal Result(IExceptionContext ectx, IRow overallResult, bool calculateDbi)
{
double Fetch(string name) => RowCursorUtils.Fetch<double>(ectx, overallResult, name);

Nmi = Fetch(ClusteringEvaluator.Nmi);
AvgMinScore = Fetch(ClusteringEvaluator.AvgMinScore);

if(calculateDbi)
Dbi = Fetch(ClusteringEvaluator.Dbi);
}
}
}

public sealed class ClusteringPerInstanceEvaluator : PerInstanceEvaluatorBase
Expand Down
35 changes: 34 additions & 1 deletion src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,39 @@ public static BinaryClassifierEvaluator.Result Evaluate<T>(
return eval.Evaluate(data.AsDynamic, labelName, scoreName, predName);
}

/// <summary>
/// Evaluates scored clustering prediction data.
/// </summary>
/// <typeparam name="T">The shape type for the input data.</typeparam>
/// <param name="ctx">The clustering context.</param>
/// <param name="data">The data to evaluate.</param>
/// <param name="score">The index delegate for the predicted score column.</param>
/// <param name="label">The optional index delegate for the label column.</param>
/// <param name="features">The optional index delegate for the features column.</param>
/// <returns>The evaluation metrics.</returns>
public static ClusteringEvaluator.Result Evaluate<T>(
this ClusteringContext ctx,
DataView<T> data,
Func<T, Vector<float>> score,
Func<T, Key<uint>> label = null,
Func<T, Vector<float>> features = null)
{
Contracts.CheckValue(data, nameof(data));
var env = StaticPipeUtils.GetEnvironment(data);
Contracts.AssertValue(env);
env.CheckValue(score, nameof(score));

var indexer = StaticPipeUtils.GetIndexer(data);
string scoreName = indexer.Get(score(indexer.Indices));

string labelName = (label != null)? indexer.Get(label(indexer.Indices)) : null;
string featuresName = (features!= null) ? indexer.Get(features(indexer.Indices)): null;

var args = new ClusteringEvaluator.Arguments() { CalculateDbi = !string.IsNullOrEmpty(featuresName) };

return new ClusteringEvaluator(env, args).Evaluate(data.AsDynamic, scoreName, labelName, featuresName);
}

/// <summary>
/// Evaluates scored multiclass classification data.
/// </summary>
Expand Down Expand Up @@ -136,7 +169,7 @@ private sealed class TrivialRegressionLossFactory : ISupportRegressionLossFactor
}

/// <summary>
/// Evaluates scored multiclass classification data.
/// Evaluates scored regression data.
/// </summary>
/// <typeparam name="T">The shape type for the input data.</typeparam>
/// <param name="ctx">The regression context.</param>
Expand Down
22 changes: 7 additions & 15 deletions src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -570,26 +570,18 @@ public sealed class Result
/// </remarks>
public double[] PerClassLogLoss { get; }

private static T Fetch<T>(IExceptionContext ectx, IRow row, string name)
{
if (!row.Schema.TryGetColumnIndex(name, out int col))
throw ectx.Except($"Could not find column '{name}'");
T val = default;
row.GetGetter<T>(col)(ref val);
return val;
}
internal Result(IExceptionContext ectx, IRow overallResult, int topK)
{
double Fetch(string name) => Fetch<double>(ectx, overallResult, name);
AccuracyMicro = Fetch(MultiClassClassifierEvaluator.AccuracyMicro);
AccuracyMacro = Fetch(MultiClassClassifierEvaluator.AccuracyMacro);
LogLoss = Fetch(MultiClassClassifierEvaluator.LogLoss);
LogLossReduction = Fetch(MultiClassClassifierEvaluator.LogLossReduction);
double FetchDouble(string name) => RowCursorUtils.Fetch<double>(ectx, overallResult, name);
AccuracyMicro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro);
AccuracyMacro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro);
LogLoss = FetchDouble(MultiClassClassifierEvaluator.LogLoss);
LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction);
TopK = topK;
if (topK > 0)
TopKAccuracy = Fetch(MultiClassClassifierEvaluator.TopKAccuracy);
TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy);

var perClassLogLoss = Fetch<VBuffer<double>>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss);
var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss);
PerClassLogLoss = new double[perClassLogLoss.Length];
perClassLogLoss.CopyTo(PerClassLogLoss);
}
Expand Down
16 changes: 2 additions & 14 deletions src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,9 @@ public sealed class Result
/// </summary>
public double RSquared { get; }

private static T Fetch<T>(IExceptionContext ectx, IRow row, string name)
{
if (!row.Schema.TryGetColumnIndex(name, out int col))
throw ectx.Except($"Could not find column '{name}'");
T val = default;
row.GetGetter<T>(col)(ref val);
return val;
}

internal Result(IExceptionContext ectx, IRow overallResult)
{
double Fetch(string name) => Fetch<double>(ectx, overallResult, name);
double Fetch(string name) => RowCursorUtils.Fetch<double>(ectx, overallResult, name);
L1 = Fetch(RegressionEvaluator.L1);
L2 = Fetch(RegressionEvaluator.L2);
Rms = Fetch(RegressionEvaluator.Rms);
Expand All @@ -230,10 +221,7 @@ internal Result(IExceptionContext ectx, IRow overallResult)
/// <param name="label">The name of the label column.</param>
/// <param name="score">The name of the predicted score column.</param>
/// <returns>The evaluation metrics for these outputs.</returns>
public Result Evaluate(
IDataView data,
string label,
string score)
public Result Evaluate(IDataView data, string label, string score)
{
Host.CheckValue(data, nameof(data));
Host.CheckNonEmpty(label, nameof(label));
Expand Down
Loading

0 comments on commit 59ebda7

Please sign in to comment.