length', 'Sepal width', 'Petal length', 'Petal width'] def species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] def file = getClass().classLoader.getResource('iris_data.csv').file Table table = Table.read().csv(file) def helper = new TablesawUtil(file) … (0..<features.size()).each { println table.summarize(features[it], mean, min, max).by('Class') } def dataFrame = table.smile().toDataFrame() def featureCols = dataFrame.drop('Class').toArray() def classNames = dataFrame.column('Class').toStringArray() int[] classes = classNames.collect{species.indexOf(it) } … … def knn = KNN.fit(featureCols, classes, 3) def predictions = knn.predict(featureCols) println """ Confusion matrix: ${ConfusionMatrix.of(classes, predictions)} """ table = table.addColumns(StringColumn.create('Result', predictions.indexed().collect{ idx, predictedClass -> def (actual, predicted) = [classNames[idx], species[predictedClass]] actual == predicted ? predicted : "$predicted/$actual".toString() })) def title = 'Petal width vs length with predicted[/actual] class' helper.show(ScatterPlot.create(title, table, 'Petal width', 'Petal length', 'Result'), 'KNNClassification') // use cross validation to get accuracy CrossValidation.classification(10, featureCols, classes, (x, y) -> KNN.fit(x, y, 3)).with { printf 'Accuracy: %.2f%% +/- %.2f\n', 100 * avg.accuracy, 100 * sd.accuracy }