Skip to content

Commit

Permalink
find offset in pack file index
Browse files Browse the repository at this point in the history
  • Loading branch information
richardjennings committed Oct 23, 2024
1 parent 49be8b0 commit 77d296f
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 0 deletions.
7 changes: 7 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const (
DefaultBranchName = "main"
DefaultEditor = "vim"
DefaultPackedRefsFile = "info/refs"
DefaultPackfileDirectory = "pack"
)

var config Cnf
Expand All @@ -36,6 +37,7 @@ type (
RefsDirectory string
RefsHeadsDirectory string
PackedRefsFile string
PackfileDirectory string
DefaultBranch string
GitIgnore []string
Editor string
Expand Down Expand Up @@ -72,6 +74,7 @@ func Configure(opts ...Opt) error {
RefsDirectory: DefaultRefsDirectory,
RefsHeadsDirectory: DefaultRefsHeadsDirectory,
PackedRefsFile: DefaultPackedRefsFile,
PackfileDirectory: DefaultPackfileDirectory,
DefaultBranch: DefaultBranchName,
Editor: DefaultEditor,
GitIgnore: []string{ //@todo read from .gitignore
Expand Down Expand Up @@ -130,6 +133,10 @@ func PackedRefsFile() string {
return filepath.Join(config.Path, config.GitDirectory, config.PackedRefsFile)
}

func ObjectPackfileDirectory() string {
return filepath.Join(config.Path, config.GitDirectory, config.ObjectsDirectory, config.PackfileDirectory)
}

func GitHeadPath() string {
return filepath.Join(config.Path, config.GitDirectory, config.HeadFile)
}
Expand Down
8 changes: 8 additions & 0 deletions fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ func NewSha(b []byte) (Sha, error) {
return Sha{}, fmt.Errorf("invalid sha %s", b)
}

func ShaFromHexString(s string) (Sha, error) {
v, err := hex.DecodeString(s)
if err != nil {
return Sha{}, err
}
return NewSha(v)
}

func (s Sha) String() string {
return s.AsHexString()
}
Expand Down
166 changes: 166 additions & 0 deletions packfile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package g

import (
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"path/filepath"
)

func Lookup(sha Sha) (*Object, error) {
var packFiles []string
// find the available pack files
if err := filepath.Walk(
ObjectPackfileDirectory(),
func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
path = filepath.Base(path)
if filepath.Ext(info.Name()) == ".idx" {
packFiles = append(packFiles, path[5:len(path)-4])
}
return nil
},
); err != nil {
return nil, err
}
// check each pack file index for the sha
for _, v := range packFiles {
offset, found, err := findOffsetInIdx(sha, filepath.Join(ObjectPackfileDirectory(), fmt.Sprintf("pack-%s.idx", v)))
if err != nil {
return nil, err
}
if found {
obj, err := findObjectInPack(offset, filepath.Join(ObjectPackfileDirectory(), fmt.Sprintf("pack-%s.pack", v)))
fmt.Println(offset, obj, err)
}

}
return nil, nil
}

func readMagic(fh *os.File) error {
magic := make([]byte, 4)
if err := binary.Read(fh, binary.BigEndian, magic); err != nil {
return err
}
// check magic bytes
if magic[0] != 255 || magic[1] != 116 || magic[2] != 79 || magic[3] != 99 {
return errors.New("invalid packfile index magic bytes")
}
return nil
}

func readIdxFormat(fh *os.File) (uint32, error) {
var format uint32
if err := binary.Read(fh, binary.BigEndian, &format); err != nil {
return 0, err
}
return format, nil
}

func readFanout(fh *os.File) ([256]uint32, error) {
// fanout is an array off jump offsets for the first byte of a sha
// this allows us to search for a sha faster, by starting closer.
var fanout [256]uint32
err := binary.Read(fh, binary.BigEndian, &fanout)
return fanout, err
}

func findObjectName(items uint32, fh *os.File, sha Sha) (uint32, bool, error) {
var hash [20]byte
// should be an efficiently implemented binary search,
// for now a blunt force string trauma
for i := uint32(0); i < items; i++ {
if err := binary.Read(fh, binary.BigEndian, &hash); err != nil {
return i, false, err
}
h, err := NewSha(hash[:])
if err != nil {
return i, false, err
}
if h.AsHexString() == sha.AsHexString() {
return i, true, nil
}
}
return 0, false, nil
}

func readObjectOffset(size uint32, fh *os.File, i uint32) (uint32, error) {

// skip remaining sorted object names
// skip 4-byte CRC32 values (*size)
// skip to i offset in 4 byte offset values
// @todo if offset most significant bit is set, lookup in long offset table
if _, err := fh.Seek(int64(4+4+(256*4)+(20*size)+(4*size)+(4*i)), io.SeekStart); err != nil {
return 0, err
}
var offset uint32
if err := binary.Read(fh, binary.BigEndian, &offset); err != nil {
return 0, err
}
// we now have the offset to lookup in the pack
return offset, nil
}

func findOffsetInIdx(sha Sha, path string) (uint32, bool, error) {
fh, err := os.Open(path)
if err != nil {
return 0, false, err
}
defer func() { _ = fh.Close() }()

// read the magic bytes to check correct
if err := readMagic(fh); err != nil {
return 0, false, err
}

// read the idx format and assert it is 2
if format, err := readIdxFormat(fh); err != nil || format != 2 {
if err != nil {
return 0, false, err
} else {
return 0, false, errors.New("invalid packfile format, expected 2")
}
}

// read fanout buckets
fanout, err := readFanout(fh)
if err != nil {
return 0, false, err
}

// lookup search bounds
var startOffset uint32
if sha.hash[0] == 0 {
startOffset = 0
} else {
startOffset = fanout[sha.hash[0]-1]
}
endOffset := fanout[sha.hash[0]]
size := fanout[255]

// to make the search more efficient, we can jump to the start
// address of this sha 1st byte bucket.
if _, err := fh.Seek(int64(startOffset*20), io.SeekCurrent); err != nil {
return 0, false, err
}

i, found, err := findObjectName(endOffset-startOffset, fh, sha)
if err != nil {
return 0, false, err
}
if !found {
return 0, false, nil
}

offset, err := readObjectOffset(size, fh, i+startOffset)
return offset, found, err
}

func findObjectInPack(offset uint32, path string) (*Object, error) {
return nil, nil
}
13 changes: 13 additions & 0 deletions packfile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package g

import "testing"

func TestLookup(t *testing.T) {
Configure()

Check failure on line 6 in packfile_test.go

View workflow job for this annotation

GitHub Actions / lint

Error return value is not checked (errcheck)
sha, err := ShaFromHexString("d208f34d505adb8914e5a5c577c6db8359173b4e")
//sha, err := ShaFromHexString("f209394e4150235bf75037bba9bf49c671edf091")
if err != nil {
t.Fatal(err)
}
Lookup(sha)

Check failure on line 12 in packfile_test.go

View workflow job for this annotation

GitHub Actions / lint

Error return value is not checked (errcheck)
}

0 comments on commit 77d296f

Please sign in to comment.