Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code changes for Manhattan, Euclidean and Minkowski distance calculation #35

Merged
merged 7 commits into from
Oct 28, 2017
30 changes: 0 additions & 30 deletions chebyshev_distance.go

This file was deleted.

15 changes: 0 additions & 15 deletions chebyshev_distance_test.go

This file was deleted.

94 changes: 94 additions & 0 deletions data_set_distances.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package stats

import (
"math"
)

// Validate data for distance calculation
func validateData(dataPointX, dataPointY []float64) error {
if len(dataPointX) == 0 || len(dataPointY) == 0 {
return EmptyInput
}

if len(dataPointX) != len(dataPointY) {
return SizeErr
}
return nil
}

// Computes Chebyshev distance between two data sets
func ChebyshevDistance(dataPointX, dataPointY []float64) (distance float64, err error) {
err = validateData(dataPointX, dataPointY)
if err != nil {
return math.NaN(), err
}
var tempDistance float64
for i := 0; i < len(dataPointY); i++ {
tempDistance = math.Abs(dataPointX[i] - dataPointY[i])
if distance < tempDistance {
distance = tempDistance
}
}
return distance, nil
}

//
// Computes Euclidean distance between two data sets
//
func EuclideanDistance(dataPointX, dataPointY []float64) (distance float64, err error) {

err = validateData(dataPointX, dataPointY)
if err != nil {
return math.NaN(), err
}
distance = 0
for i := 0; i < len(dataPointX); i++ {
distance = distance + ((dataPointX[i] - dataPointY[i]) * (dataPointX[i] - dataPointY[i]))
}
return math.Sqrt(distance), nil
}

//
// Computes Manhattan distance between two data sets
//
func ManhattanDistance(dataPointX, dataPointY []float64) (distance float64, err error) {
err = validateData(dataPointX, dataPointY)
if err != nil {
return math.NaN(), err
}
distance = 0
for i := 0; i < len(dataPointX); i++ {
distance = distance + math.Abs(dataPointX[i]-dataPointY[i])
}
return distance, nil
}

//
// Computes minkowski distance between two data sets.
//
// Input:
// dataPointX: First set of data points
// dataPointY: Second set of data points. Length of both data
// sets must be equal.
// lambda: aka p or city blocks; With lambda = 1
// returned distance is manhattan distance and
// lambda = 2; it is euclidean distance. Lambda
// reaching to infinite - distance would be chebysev
// distance.
// Output:
// Distance or error
//
func MinkowskiDistance(dataPointX, dataPointY []float64, lambda float64) (distance float64, err error) {
err = validateData(dataPointX, dataPointY)
if err != nil {
return math.NaN(), err
}
for i := 0; i < len(dataPointY); i++ {
distance = distance + math.Pow(math.Abs(dataPointX[i]-dataPointY[i]), lambda)
}
distance = math.Pow(distance, float64(1/lambda))
if math.IsInf(distance, 1) == true {
return math.NaN(), InfValue
}
return distance, nil
}
49 changes: 49 additions & 0 deletions data_set_distances_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package stats

import (
"testing"
)

type distanceFunctionType func([]float64, []float64) (float64, error)

var minkowskiDistanceTestMatrix = []struct {
dataPointX []float64
dataPointY []float64
lambda float64
distance float64
}{
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 1, 24},
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 2, 10.583005244258363},
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 99, 6},
}

var distanceTestMatrix = []struct {
dataPointX []float64
dataPointY []float64
distance float64
distanceFunction distanceFunctionType
}{
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 6, ChebyshevDistance},
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 24, ManhattanDistance},
{[]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, 10.583005244258363, EuclideanDistance},
}

func TestDataSetDistances(t *testing.T) {

// Test Minkowski Distance with different lambda values.
for _, testData := range minkowskiDistanceTestMatrix {
distance, err := MinkowskiDistance(testData.dataPointX, testData.dataPointY, testData.lambda)
if err != nil && distance != testData.distance {
t.Errorf("Failed to compute Minkowski distance.")
}
}

// Compute distance with the help of all
// algorithms.
for _, testSet := range distanceTestMatrix {
distance, err := testSet.distanceFunction(testSet.dataPointX, testSet.dataPointY)
if err != nil && testSet.distance != distance {
t.Errorf("Failed to compute distance.")
}
}
}
1 change: 1 addition & 0 deletions errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ var (
ZeroErr = statsErr{"Slice must not contain zero values."}
BoundsErr = statsErr{"Input is outside of range."}
SizeErr = statsErr{"Slices must be the same length."}
InfValue = statsErr{"Value is infinite."}
)
18 changes: 16 additions & 2 deletions examples/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,21 @@ func main() {
a, _ = stats.Round(2.18978102189781, 3)
fmt.Println(a) // 2.189

d, _ = stats.ComputeChebyshevDistance([]float64{2, 4, 4, 4, 5, 5, 7, 9}, []float64{2, 4, 4, 4, 5, 5, 7, 1})
fmt.Println(d) // Should yield 8
d, _ = stats.ChebyshevDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2})
fmt.Println(d) // Should yield 6

d, _ = stats.ManhattanDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2})
fmt.Println(d) // Should yield 24

d, _ = stats.EuclideanDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2})
fmt.Println(d) // Should yield 10.583005244258363

d, _ = stats.MinkowskiDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, float64(1))
fmt.Println(d) // Should yield 24

d, _ = stats.MinkowskiDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, float64(2))
fmt.Println(d) // Should yield 10.583005244258363

d, _ = stats.MinkowskiDistance([]float64{2, 3, 4, 5, 6, 7, 8}, []float64{8, 7, 6, 5, 4, 3, 2}, float64(99))
fmt.Println(d) // Should yield 6
}