Skip to content

Commit

Permalink
Enhance the function to find an ideal VM OS image
Browse files Browse the repository at this point in the history
  • Loading branch information
cb-github-robot authored Jun 26, 2024
2 parents f0f2a77 + a3f4509 commit 1857883
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 17 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/cloud-barista/cm-beetle
go 1.21.6

require (
github.com/cloud-barista/cb-store v0.8.0
github.com/cloud-barista/cb-store v0.8.2
github.com/cloud-barista/cb-tumblebug v0.8.12
github.com/cloud-barista/cm-honeybee/agent v0.0.0-20240530070023-ee1c0a77fbf7
github.com/docker/docker v26.1.3+incompatible
Expand All @@ -26,7 +26,7 @@ require (
require (
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/bwmarrin/snowflake v0.3.0 // indirect
github.com/cloud-barista/cb-log v0.8.0 // indirect
github.com/cloud-barista/cb-log v0.8.2 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ github.com/chzyer/readline v1.5.0/go.mod h1:x22KAscuvRqlLoK9CsoYsmxoXZMMFVyOl86c
github.com/chzyer/test v0.0.0-20210722231415-061457976a23/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cloud-barista/cb-log v0.8.0 h1:ArWCs1EgpoD3ZnBgcC4cAw5ufI/JHmFKfJswlv4whgk=
github.com/cloud-barista/cb-log v0.8.0/go.mod h1:nGgfTFMPwl1MpCO3FBjexUkNdOYA0BNJoyM9Pd0lMms=
github.com/cloud-barista/cb-log v0.8.2 h1:hPCbLj6TW6m9UWlq002sDuGgxKFVp68w4V3k493+MxY=
github.com/cloud-barista/cb-log v0.8.2/go.mod h1:nGgfTFMPwl1MpCO3FBjexUkNdOYA0BNJoyM9Pd0lMms=
github.com/cloud-barista/cb-store v0.8.0 h1:0K47YEf+K3wx18D+m0XirlDbdTz229XxsTXw6WACjRA=
github.com/cloud-barista/cb-store v0.8.0/go.mod h1:6NuA5TdeVRExd59ULXv6LEhm4EE0ODn9L820g4VqApo=
github.com/cloud-barista/cb-store v0.8.2 h1:7excW7SX0Xw9Xxo0xO4HkndjOYonh4z6mFzNqYN3XH4=
github.com/cloud-barista/cb-store v0.8.2/go.mod h1:GBTRuOApzMWSENFSgIPtrbWUKSwP30bn90gWE0ENUD8=
github.com/cloud-barista/cb-tumblebug v0.8.12 h1:uc5aOI9q5XhMq7GQ9s7WyWyLoxtklcqib/gdDaSgumw=
github.com/cloud-barista/cb-tumblebug v0.8.12/go.mod h1:yOwgw7jXqMdSSgC2g0TvgCG1WyXNw6Q7J6JGfbsqEOw=
github.com/cloud-barista/cm-honeybee/agent v0.0.0-20240530070023-ee1c0a77fbf7 h1:4r7YI8FlWfk2H/H+fICupvzEjCP+4cCkqG5CMzvhDdo=
Expand Down
88 changes: 73 additions & 15 deletions pkg/core/recommendation/recommendation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package recommendation
import (
"encoding/json"
"fmt"
"os"
"regexp"
"strings"

Expand All @@ -14,14 +13,15 @@ import (
"github.com/cloud-barista/cm-beetle/pkg/core/common"
"github.com/go-resty/resty/v2"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
)

func Recommend(srcInfra []infra.Infra) (cloudmodel.InfraMigrationReq, error) {

// Initialize resty client with basic auth
client := resty.New()
apiUser := os.Getenv("API_USERNAME")
apiPass := os.Getenv("API_PASSWORD")
apiUser := viper.GetString("api.username")
apiPass := viper.GetString("api.password")
client.SetBasicAuth(apiUser, apiPass)

// set endpoint
Expand Down Expand Up @@ -260,7 +260,7 @@ func Recommend(srcInfra []infra.Infra) (cloudmodel.InfraMigrationReq, error) {
log.Debug().Msg("keywords for the VM OS image recommendation: " + keywords)

// Select VM OS image via LevenshteinDistance-based text similarity
delimiters1 := []string{" ", "-", "_", ",", "(", ")", "[", "]"}
delimiters1 := []string{" ", "-", "_", ",", "(", ")", "[", "]", "/"}
delimiters2 := delimiters1
vmOsImageId := FindBestVmOsImage(keywords, delimiters1, resMcisDynamicCheck.ReqCheck[0].Image, delimiters2)

Expand Down Expand Up @@ -301,7 +301,7 @@ func FindBestVmOsImage(keywords string, kwDelimiters []string, vmImages []mcir.T
var highestScore float64

for _, image := range vmImages {
score := calculateSimilarity(keywords, kwDelimiters, image.CspImageName, imgDelimiters)
score := CalculateSimilarity(keywords, kwDelimiters, image.CspImageName, imgDelimiters)
if score > highestScore {
highestScore = score
bestVmOsImageID = image.Id
Expand All @@ -314,27 +314,37 @@ func FindBestVmOsImage(keywords string, kwDelimiters []string, vmImages []mcir.T
return bestVmOsImageID
}

// calculateSimilarity calculates the similarity between two texts based on word similarities
func calculateSimilarity(text1 string, delimiters1 []string, text2 string, delimiters2 []string) float64 {
// CalculateSimilarity calculates the similarity between two texts based on word similarities
func CalculateSimilarity(text1 string, delimiters1 []string, text2 string, delimiters2 []string) float64 {

words1 := splitToArray(text1, delimiters1)
words2 := splitToArray(text2, delimiters2)

log.Trace().Msgf("From text 1: %s", text1)
log.Trace().Msgf("To word array 1: %v", words1)
log.Trace().Msgf("From text 2: %s", text2)
log.Trace().Msgf("To word array 2: %v", words2)

// Calculate the similarity between two texts based on word similarities
totalSimilarity := 0.0
for _, word1 := range words1 {
bestMatch := 0.0
bestMatchWord := ""
for _, word2 := range words2 {
similarity := wordSimilarity(word1, word2)
// similarity := CalculateSimilarityByLevenshteinDistance(word1, word2)
similarity := CalculateSimilarityBySequenceMatcher(word1, word2)
if similarity > bestMatch {
bestMatch = similarity
}
bestMatchWord = word2

totalSimilarity += activateByReLU(bestMatch, 0.3)
}
}
log.Trace().Msgf("Best match for '%s': '%s' (similarity: %.2f)", word1, bestMatchWord, bestMatch)
totalSimilarity += activateByReLU(bestMatch, 0.5)
}

// Normalize by the number of words
return totalSimilarity / float64(len(words1))
return totalSimilarity // / float64(len(words1))
}

func splitToArray(text string, delimiters []string) []string {
Expand All @@ -348,24 +358,41 @@ func splitToArray(text string, delimiters []string) []string {
text = strings.ToLower(text)

// Create a regular expression pattern for the delimiters
pattern := strings.Join(delimiters, "|")
escapedDelimiters := make([]string, len(delimiters))
for i, d := range delimiters {
escapedDelimiters[i] = regexp.QuoteMeta(d)
}
pattern := strings.Join(escapedDelimiters, "|")
re := regexp.MustCompile(pattern)

// Split text by the delimiters
arr := re.Split(text, -1)

return arr
// Remove empty strings resulting from the split
result := []string{}
for _, str := range arr {
if str != "" {
result = append(result, str)
}
}

return result
}

// wordSimilarity calculates the similarity between two words based on Levenshtein distance
func wordSimilarity(word1, word2 string) float64 {
// CalculateSimilarityByLevenshteinDistance calculates the similarity between two words based on Levenshtein distance
func CalculateSimilarityByLevenshteinDistance(word1, word2 string) float64 {
maxLen := float64(max(len(word1), len(word2)))
if maxLen == 0 {
return 1.0
}
return 1.0 - float64(LevenshteinDistance(word1, word2))/maxLen
}

// CalculateSimilarityBySequenceMatcher calculates the similarity between two words based on Levenshtein distance
func CalculateSimilarityBySequenceMatcher(word1, word2 string) float64 {
return SequenceMatcher(word1, word2)
}

// activateByReLU applies a ReLU function that activates if the similarity is greater than a threshold
func activateByReLU(similarity, threshold float64) float64 {
if similarity > threshold {
Expand Down Expand Up @@ -421,6 +448,37 @@ func min(a, b int) int {
return b
}

// longestCommonSubstring finds the longest common substring between two strings.
func longestCommonSubstring(s1, s2 string) string {
l1, l2 := len(s1), len(s2)
matrix := make([][]int, l1+1)
for i := range matrix {
matrix[i] = make([]int, l2+1)
}

longest := 0
endIndex := l1
for i := 1; i <= l1; i++ {
for j := 1; j <= l2; j++ {
if s1[i-1] == s2[j-1] {
matrix[i][j] = matrix[i-1][j-1] + 1
if matrix[i][j] > longest {
longest = matrix[i][j]
endIndex = i
}
}
}
}

return s1[endIndex-longest : endIndex]
}

// SequenceMatcher calculates the similarity ratio between two strings.
func SequenceMatcher(text1, text2 string) float64 {
lcs := longestCommonSubstring(text1, text2)
return 2.0 * float64(len(lcs)) / float64(len(text1)+len(text2))
}

// // JaccardSimilarity calculates the Jaccard similarity between two strings
// func JaccardSimilarity(text1, delimiter1, text2, delimiter2 string) float64 {

Expand Down
48 changes: 48 additions & 0 deletions pkg/example/levenshtein-distance/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package main

import (
"fmt"

"github.com/cloud-barista/cm-beetle/pkg/core/recommendation"
)

func main() {

compareWordSet := []struct {
str1 string
str2 string
}{
{"22.04", "22.04.1"},
{"22.04", "20.04"},
{"20.04", "18.04"},
{"x86_64", "amd64"},
{"hvm-ssd", "ssd"},
{"hvm-ssd", "hdd"},
}

for _, set := range compareWordSet {
fmt.Printf("Comparing '%s' with '%s':\n", set.str1, set.str2)
fmt.Printf(" - LevenshteinDistance, Similarity ratio: %.2f\n", recommendation.CalculateSimilarityByLevenshteinDistance(set.str1, set.str2))
fmt.Printf(" - SequenceMatcher, Similarity ratio: %.2f\n", recommendation.CalculateSimilarityBySequenceMatcher(set.str1, set.str2))
fmt.Println("--------------------------------------------------------")
}

keywords := "Ubuntu 22.04.4 LTS (Jammy Jellyfish) x86_64 SSD"
vmImages := []string{
"ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-20220609",
"ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20191002",
}

// Select VM OS image via LevenshteinDistance-based text similarity
delimiters1 := []string{" ", "-", "_", ",", "(", ")", "[", "]", "/"}
delimiters2 := delimiters1

for _, image := range vmImages {
fmt.Printf("Comparing keywords with VM Image:\n")
fmt.Printf("Keywords: '%s'\n", keywords)
fmt.Printf("VM Image: '%s'\n", image)
score := recommendation.CalculateSimilarity(keywords, delimiters1, image, delimiters2)
fmt.Printf(" - Similarity Score: %.2f\n", score)
fmt.Println("--------------------------------------------------------")
}
}

0 comments on commit 1857883

Please sign in to comment.