From cde4fca02029cf3552cb63c050d3002bab68828f Mon Sep 17 00:00:00 2001 From: Serghei Cebotari Date: Wed, 20 Sep 2023 13:46:55 +0300 Subject: Solution organize --- RhSolutions.ML.Lib/Product.cs | 16 +++++++++++ RhSolutions.ML.Lib/RhSolutions.ML.Lib.csproj | 13 +++++++++ RhSolutions.ML.Lib/RhSolutionsMLBuilder.cs | 42 ++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 RhSolutions.ML.Lib/Product.cs create mode 100644 RhSolutions.ML.Lib/RhSolutions.ML.Lib.csproj create mode 100644 RhSolutions.ML.Lib/RhSolutionsMLBuilder.cs (limited to 'RhSolutions.ML.Lib') diff --git a/RhSolutions.ML.Lib/Product.cs b/RhSolutions.ML.Lib/Product.cs new file mode 100644 index 0000000..99040fc --- /dev/null +++ b/RhSolutions.ML.Lib/Product.cs @@ -0,0 +1,16 @@ +using Microsoft.ML.Data; +namespace RhSolutions.ML; + +public class Product +{ + [LoadColumn(0)] + public string? Name { get; set; } + [LoadColumn(1)] + public string? Type { get; set; } +} + +public class TypePrediction +{ + [ColumnName("PredictedLabel")] + public string? Type { get; set; } +} \ No newline at end of file diff --git a/RhSolutions.ML.Lib/RhSolutions.ML.Lib.csproj b/RhSolutions.ML.Lib/RhSolutions.ML.Lib.csproj new file mode 100644 index 0000000..928fc99 --- /dev/null +++ b/RhSolutions.ML.Lib/RhSolutions.ML.Lib.csproj @@ -0,0 +1,13 @@ + + + + net7.0 + enable + enable + + + + + + + diff --git a/RhSolutions.ML.Lib/RhSolutionsMLBuilder.cs b/RhSolutions.ML.Lib/RhSolutionsMLBuilder.cs new file mode 100644 index 0000000..be2e2a6 --- /dev/null +++ b/RhSolutions.ML.Lib/RhSolutionsMLBuilder.cs @@ -0,0 +1,42 @@ +using Microsoft.ML; + +namespace RhSolutions.ML.Lib; + +public class RhSolutionsMLBuilder +{ + private static string _appPath = Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]) ?? "."; + + private static MLContext _mlContext = new MLContext(seed: 0); + + public static void RebuildModel() + { + var _trainDataView = _mlContext.Data.LoadFromTextFile( + Path.Combine(_appPath, "..", "..", "..", "..", "Data", "*"), hasHeader: false); + var pipeline = ProcessData(); + BuildAndTrainModel(_trainDataView, pipeline, out ITransformer trainedModel); + SaveModelAsFile(_mlContext, _trainDataView.Schema, trainedModel); + } + private static IEstimator ProcessData() + { + var pipeline = _mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "Type", outputColumnName: "Label") + .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: "Name", outputColumnName: "NameFeaturized")) + .Append(_mlContext.Transforms.Concatenate("Features", "NameFeaturized")) + .AppendCacheCheckpoint(_mlContext); + return pipeline; + } + + private static IEstimator BuildAndTrainModel(IDataView trainingDataView, IEstimator pipeline, out ITransformer trainedModel) + { + var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features")) + .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + trainedModel = trainingPipeline.Fit(trainingDataView); + return trainingPipeline; + } + + private static void SaveModelAsFile(MLContext mlContext, DataViewSchema trainingDataViewSchema, ITransformer model) + { + mlContext.Model.Save(model, trainingDataViewSchema, + Path.Combine(_appPath, "..", "..", "..", "..", "Models", "model.zip")); + } +} -- cgit v1.2.3