Skip to content

Commit

Permalink
Merge pull request #48 from A2-ai/feature/hash-cache
Browse files Browse the repository at this point in the history
Add local file hash caching
  • Loading branch information
andriygm authored Aug 22, 2023
2 parents e4a79b5 + 2263335 commit ce914ff
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 8 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module dvs
go 1.20

require (
github.com/adrg/xdg v0.4.0
github.com/dustin/go-humanize v1.0.1
github.com/fatih/color v1.15.0
github.com/schollz/progressbar/v3 v3.13.1
Expand Down
7 changes: 6 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls=
github.com/adrg/xdg v0.4.0/go.mod h1:N6ag73EX4wyxeaoeHctc1mas01KZgsj5tYiAIwqJE/E=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -42,8 +44,9 @@ github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRM
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/zeebo/assert v1.1.0 h1:hU1L1vLTHsnO8x8c9KAR5GmM5QscxHg5RNU5z5qbUWY=
github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/blake3 v0.2.3 h1:TFoLXsjeXqRNFxSbk35Dk4YtszE/MQQGK10BH4ptoTg=
Expand All @@ -52,6 +55,7 @@ github.com/zeebo/pcg v1.0.1 h1:lyqfGeWiv4ahac6ttHs+I5hwtH/+1mrhlCtVNQM2kHo=
github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4=
golang.org/x/exp v0.0.0-20230801115018-d63ba01acd4b h1:r+vk0EmXNmekl0S0BascoeeoHk/L7wmaW2QF90K+kYI=
golang.org/x/exp v0.0.0-20230801115018-d63ba01acd4b/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
Expand All @@ -62,5 +66,6 @@ golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
104 changes: 104 additions & 0 deletions internal/file/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package file

import (
"encoding/gob"
"errors"
"os"
"path/filepath"
"time"

"github.com/adrg/xdg"
)

type CacheData struct {
Hash string
CreationTime time.Time
ModificationTime time.Time
}

// Returns the hash of the file at the given path, or an error if the file cache is invalid
func GetCachedHash(path string) (string, error) {
// Get absolute path
absPath, err := filepath.Abs(path)
if err != nil {
return "", err
}

// Open the cache
cachePath := filepath.Join(xdg.CacheHome, "dvs", absPath)
cacheFile, err := os.Open(cachePath)
if err != nil {
return "", err
}
defer cacheFile.Close()

// Read the cache contents
var cache CacheData
err = gob.NewDecoder(cacheFile).Decode(&cache)
if err != nil {
return "", err
}

// Get info about the file
fileInfo, err := os.Stat(absPath)
if err != nil {
return "", err
}

// Ensure creation time matches, if we can get it
// if cache.CreationTime != fileInfo.Sys() {
// return "", err
// }

// Ensure modification time matches
if cache.ModificationTime != fileInfo.ModTime() {
// Remove the cache file
err = os.Remove(cachePath)
if err != nil {
return "", err
}

return "", errors.New("file modification time does not match cache (invalidating)")
}

// Return the hash
return cache.Hash, nil
}

// Adds the hash of the file at the given path to the cache
func WriteHashToCache(path string, hash string) error {
// Get absolute path
absPath, err := filepath.Abs(path)
if err != nil {
return err
}

cachePath := filepath.Join(xdg.CacheHome, "dvs", absPath)

// Create parent directories if they don't exist
err = os.MkdirAll(filepath.Dir(cachePath), 0755)
if err != nil {
return err
}

// Open the cache, creating it if it doesn't exist
cacheFile, err := os.OpenFile(cachePath, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
return err
}
defer cacheFile.Close()

// Get info about the file
fileInfo, err := os.Stat(absPath)
if err != nil {
return err
}

// Write the cache contents
err = gob.NewEncoder(cacheFile).Encode(CacheData{
Hash: hash,
// CreationTime: time.Now(),
ModificationTime: fileInfo.ModTime(),
})
return err
}
9 changes: 9 additions & 0 deletions internal/file/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package file

import (
"testing"
)

func TestCache(t *testing.T) {

}
20 changes: 18 additions & 2 deletions internal/file/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,30 @@ import (
"github.com/zeebo/blake3"
)

func GetFileHash(path string) (string, error) {
func GetFileHash(path string) (hash string, err error) {
// Hit cache if we can first
hash, err = GetCachedHash(path)
if err == nil {
return hash, nil
}

// Read in file bytes
fileContents, err := os.ReadFile(path)
if err != nil {
return "", err
}

// Hash file contents
hash := fmt.Sprintf("%x", blake3.Sum256(fileContents))
hash = fmt.Sprintf("%x", blake3.Sum256(fileContents))
if err != nil {
return "", err
}

// Cache the hash so we don't have to hash the file again
err = WriteHashToCache(path, hash)
if err != nil {
return "", err
}

return hash, nil
}
6 changes: 5 additions & 1 deletion internal/storage/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"dvs/internal/git"
"dvs/internal/log"
"dvs/internal/meta"
"dvs/internal/utils"
"os"
"os/user"
"path/filepath"
Expand All @@ -16,13 +17,16 @@ func Add(localPath string, storageDir string, gitDir string, message string, dry
// Get file hash
log.Print(" Getting hash...")

startTime := time.Now()
fileHash, err := file.GetFileHash(localPath)
endTime := time.Now()

if err != nil {
return fileHash, err
}

log.OverwritePreviousLine()
log.Print(" Getting hash...", log.ColorGreen("✔"))
log.Print(" Getting hash...", log.ColorGreen("✔ in ", utils.FormatDuration(endTime.Sub(startTime))))
log.JsonLogger.Actions = append(log.JsonLogger.Actions, log.JsonAction{
Action: "got hash",
Path: localPath,
Expand Down
10 changes: 7 additions & 3 deletions internal/storage/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"dvs/internal/file"
"dvs/internal/log"
"dvs/internal/meta"
"dvs/internal/utils"
"errors"
"os"
"path/filepath"
"time"
)

// Gets a file from storage
Expand All @@ -27,15 +29,17 @@ func Get(localPath string, storageDir string, gitDir string, dry bool) error {
var localHash string
if err == nil {
// Get local file's hash
log.Print(" Calculating local hash...")
log.Print(" Getting local hash...")

startTime := time.Now()
localHash, err = file.GetFileHash(localPath)
endTime := time.Now()

log.OverwritePreviousLine()
if err != nil {
log.Print(" Calculating local hash...", log.ColorBold(log.ColorYellow("!")))
log.Print(" Getting local hash...", log.ColorBold(log.ColorYellow("!")))
} else {
log.Print(" Calculating local hash...", log.ColorGreen("✔"))
log.Print(" Getting local hash...", log.ColorGreen("✔ in ", utils.FormatDuration(endTime.Sub(startTime))))
}
}

Expand Down
6 changes: 5 additions & 1 deletion internal/storage/get_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"os"
"path/filepath"
"testing"
"time"
)

func TestGetNoLongerInStorage(t *testing.T) {
Expand Down Expand Up @@ -104,6 +105,9 @@ func TestGetAgainAfterLocalMod(t *testing.T) {
t.Fatal(err)
}

// Wait a bit to ensure modification time is different
time.Sleep(time.Millisecond)

// Modify the file locally
err = os.WriteFile(filepath.Join(tempDir, "test.txt"), []byte("test2"), 0644)
if err != nil {
Expand All @@ -129,6 +133,6 @@ func TestGetAgainAfterLocalMod(t *testing.T) {
}

if string(data) != "test" {
t.Error("File contents did not match")
t.Error("File contents did not match, should have been test but got", string(data))
}
}
14 changes: 14 additions & 0 deletions internal/utils/duration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package utils

import "time"

// FormatDuration formats a duration with a precision of 3 digits
// if it is less than 100s.
func FormatDuration(d time.Duration) string {
scale := 100 * time.Second
// look for the max scale that is smaller than d
for scale > d {
scale = scale / 10
}
return d.Round(scale / 100).String()
}

0 comments on commit ce914ff

Please sign in to comment.