aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-01-30 17:06:51 +0100
committerGravatar Runxi Yu2026-01-30 17:06:51 +0100
commit8e320c9ca634e6b2431f9442b7d5191864735ae4 (patch)
treeb2e3d18144865fad0508a4f890dada5aee6ba940
parentreachability: Add basic reachability API (diff)
signatureNo signature
packed, delta: Implement thin packs
-rw-r--r--delta_write_select.go1
-rw-r--r--packed_write_pack.go102
-rw-r--r--packed_write_test.go212
3 files changed, 306 insertions, 9 deletions
diff --git a/delta_write_select.go b/delta_write_select.go
index 9113b80f..2911867d 100644
--- a/delta_write_select.go
+++ b/delta_write_select.go
@@ -8,6 +8,7 @@ type objectToPack struct {
body []byte
offset uint64
deltaDepth int
+ inPack bool
}
type deltaContext struct {
diff --git a/packed_write_pack.go b/packed_write_pack.go
index 329e2a7f..1d8cbc1e 100644
--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -283,11 +283,39 @@ func packOfsEncode(dist uint64) ([]byte, error) {
// packWrite writes a pack stream for the provided object ids.
func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) {
+ if opts.EnableThinPack {
+ return Hash{}, errThinPackUnimplemented
+ }
+ return repo.packWriteObjects(w, objects, opts, nil)
+}
+
+// packWriteReachable writes a pack stream for objects reachable from the
+// provided reachability query.
+func (repo *Repository) packWriteReachable(w io.Writer, query ReachabilityQuery, opts packWriteOptions) (Hash, error) {
if repo == nil {
return Hash{}, ErrInvalidObject
}
- if opts.EnableThinPack {
- return Hash{}, errThinPackUnimplemented
+ query.Mode = ReachabilityAllObjects
+ walk, err := repo.ReachableObjects(query)
+ if err != nil {
+ return Hash{}, err
+ }
+ var objects []Hash
+ for obj := range walk.Seq() {
+ objects = append(objects, obj.ID)
+ }
+ if err := walk.Err(); err != nil {
+ return Hash{}, err
+ }
+ return repo.packWriteObjects(w, objects, opts, walk)
+}
+
+func (repo *Repository) packWriteObjects(w io.Writer, objects []Hash, opts packWriteOptions, have *ReachabilityWalk) (Hash, error) {
+ if repo == nil {
+ return Hash{}, ErrInvalidObject
+ }
+ if opts.EnableThinPack && have == nil {
+ return Hash{}, ErrInvalidObject
}
if len(objects) > int(^uint32(0)) {
return Hash{}, ErrInvalidObject
@@ -312,15 +340,22 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt
deltaSeed = binary.LittleEndian.Uint64(seedBytes[:])
}
+ if opts.EnableDeltas && opts.EnableThinPack {
+ if err := repo.seedDeltaCandidatesFromHaves(&dctx, have.query.Haves); err != nil {
+ return Hash{}, err
+ }
+ }
+
for _, id := range objects {
ty, body, err := repo.ReadObjectTypeRaw(id)
if err != nil {
return Hash{}, err
}
obj := &objectToPack{
- id: id,
- ty: ty,
- body: body,
+ id: id,
+ ty: ty,
+ body: body,
+ inPack: true,
}
startOffset := pw.bytesWritten
wroteDelta := false
@@ -328,11 +363,27 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt
if opts.EnableDeltas && ty == ObjectTypeBlob {
base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
if base != nil && delta != nil {
- if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {
- return Hash{}, err
+ switch {
+ case base.inPack:
+ if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {
+ return Hash{}, err
+ }
+ wroteDelta = true
+ obj.deltaDepth = base.deltaDepth + 1
+ case opts.EnableThinPack:
+ inHave, err := have.HaveContains(base.id)
+ if err != nil {
+ return Hash{}, err
+ }
+ if inHave {
+ if err := pw.WriteRefDelta(base.id, len(base.body), len(body), delta); err != nil {
+ return Hash{}, err
+ }
+ wroteDelta = true
+ obj.deltaDepth = base.deltaDepth + 1
+ }
+ default:
}
- wroteDelta = true
- obj.deltaDepth = base.deltaDepth + 1
}
}
if !wroteDelta {
@@ -351,6 +402,39 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt
return pw.Close()
}
+func (repo *Repository) seedDeltaCandidatesFromHaves(ctx *deltaContext, haves []Hash) error {
+ if repo == nil {
+ return ErrInvalidObject
+ }
+ if ctx == nil || ctx.window <= 0 || len(haves) == 0 {
+ return nil
+ }
+ walk, err := repo.ReachableObjects(ReachabilityQuery{
+ Wants: haves,
+ Mode: ReachabilityAllObjects,
+ })
+ if err != nil {
+ return err
+ }
+ for obj := range walk.Seq() {
+ if obj.Type != ObjectTypeBlob {
+ continue
+ }
+ ty, body, err := repo.ReadObjectTypeRaw(obj.ID)
+ if err != nil {
+ return err
+ }
+ candidate := &objectToPack{
+ id: obj.ID,
+ ty: ty,
+ body: body,
+ inPack: false,
+ }
+ ctx.addCandidate(candidate)
+ }
+ return walk.Err()
+}
+
type packWriteOptions struct {
EnableDeltas bool
EnableThinPack bool
diff --git a/packed_write_test.go b/packed_write_test.go
index ccd81844..82e573b4 100644
--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -358,6 +358,90 @@ func TestPackWriteDeltas(t *testing.T) {
_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}
+func TestPackWriteThinPackReachable(t *testing.T) {
+ repoPath, cleanup := setupTestRepo(t)
+ defer cleanup()
+
+ workDir, cleanupWork := setupWorkDir(t)
+ defer cleanupWork()
+
+ base := bytes.Repeat([]byte("A"), 16384)
+ if err := os.WriteFile(filepath.Join(workDir, "file.txt"), base, 0o644); err != nil {
+ t.Fatalf("write base file: %v", err)
+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "base")
+ haveHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+ mod := append([]byte(nil), base...)
+ mod[1024] = 'B'
+ if err := os.WriteFile(filepath.Join(workDir, "file.txt"), mod, 0o644); err != nil {
+ t.Fatalf("write mod file: %v", err)
+ }
+ gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
+ gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "target")
+ wantHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
+
+ repo, err := OpenRepository(repoPath)
+ if err != nil {
+ t.Fatalf("OpenRepository failed: %v", err)
+ }
+ defer func() { _ = repo.Close() }()
+
+ wantID, _ := repo.ParseHash(wantHash)
+ haveID, _ := repo.ParseHash(haveHash)
+
+ query := ReachabilityQuery{
+ Wants: []Hash{wantID},
+ Haves: []Hash{haveID},
+ Mode: ReachabilityAllObjects,
+ StopAtHaves: true,
+ }
+ var buf bytes.Buffer
+ if _, err := repo.packWriteReachable(&buf, query, packWriteOptions{
+ EnableDeltas: true,
+ EnableThinPack: true,
+ MinDeltaSavings: 1,
+ }); err != nil {
+ t.Fatalf("packWriteReachable failed: %v", err)
+ }
+
+ thinSeen, err := checkThinPackStream(buf.Bytes(), repo)
+ if err != nil {
+ t.Fatalf("thin pack stream invalid: %v", err)
+ }
+ if !thinSeen {
+ t.Fatalf("expected thin pack with ref-delta base outside pack")
+ }
+
+ packDir := filepath.Join(repoPath, "objects", "pack")
+ if err := os.MkdirAll(packDir, 0o755); err != nil {
+ t.Fatalf("failed to create pack dir: %v", err)
+ }
+ packPath := filepath.Join(packDir, "furgit-thin-test.pack")
+ idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
+ _ = os.Remove(packPath)
+ _ = os.Remove(idxPath)
+
+ cmd := exec.Command("git", "index-pack", "--stdin", "--fix-thin", "-o", idxPath, packPath)
+ cmd.Dir = repoPath
+ cmd.Env = append(os.Environ(),
+ "GIT_CONFIG_GLOBAL=/dev/null",
+ "GIT_CONFIG_SYSTEM=/dev/null",
+ )
+ cmd.Stdin = bytes.NewReader(buf.Bytes())
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("git index-pack --fix-thin failed: %v\n%s", err, output)
+ }
+
+ _ = gitCmd(t, repoPath, "cat-file", "-p", wantHash)
+ _ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
+
+ _ = os.Remove(packPath)
+ _ = os.Remove(idxPath)
+}
+
func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
data, err := os.ReadFile(path)
if err != nil {
@@ -504,6 +588,134 @@ func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
return nil
}
+func checkThinPackStream(data []byte, repo *Repository) (bool, error) {
+ if repo == nil {
+ return false, ErrInvalidObject
+ }
+ if len(data) < 12 {
+ return false, ErrInvalidObject
+ }
+ if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 {
+ return false, ErrInvalidObject
+ }
+ count := int(binary.BigEndian.Uint32(data[8:12]))
+ pos := 12
+ hashSize := repo.hashAlgo.Size()
+ type objEntry struct {
+ offset uint64
+ ty ObjectType
+ body []byte
+ }
+ byOffset := make(map[uint64]objEntry, count)
+ byHash := make(map[string]objEntry, count)
+ thinSeen := false
+
+ for i := 0; i < count; i++ {
+ objOffset := uint64(pos)
+ ty, size, consumed, err := packHeaderParse(data[pos:])
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d header at %d: %v", i, pos, err)
+ }
+ pos += consumed
+ baseTy := ObjectTypeInvalid
+ var baseBody []byte
+ switch ty {
+ case ObjectTypeOfsDelta:
+ dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
+ }
+ pos += distConsumed
+ if dist == 0 || dist > objOffset {
+ return thinSeen, fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
+ }
+ baseOffset := objOffset - dist
+ base, ok := byOffset[baseOffset]
+ if !ok {
+ return thinSeen, fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
+ }
+ baseTy = base.ty
+ baseBody = base.body
+ case ObjectTypeRefDelta:
+ if pos+hashSize > len(data) {
+ return thinSeen, ErrInvalidObject
+ }
+ var baseHash Hash
+ copy(baseHash.data[:], data[pos:pos+hashSize])
+ baseHash.algo = repo.hashAlgo
+ baseEntry, ok := byHash[baseHash.String()]
+ if ok {
+ baseTy = baseEntry.ty
+ baseBody = baseEntry.body
+ } else {
+ thinSeen = true
+ ty, body, err := repo.ReadObjectTypeRaw(baseHash)
+ if err != nil {
+ return thinSeen, err
+ }
+ baseTy = ty
+ baseBody = body
+ }
+ pos += hashSize
+ default:
+ }
+
+ payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
+ }
+ payload := append([]byte(nil), payloadBuf.Bytes()...)
+ payloadBuf.Release()
+ pos += zconsumed
+ switch ty {
+ case ObjectTypeOfsDelta, ObjectTypeRefDelta:
+ if baseBody == nil {
+ return thinSeen, fmt.Errorf("obj %d missing base body", i)
+ }
+ pos := 0
+ baseSize, err := packVarintRead(payload, &pos)
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d delta base size: %v", i, err)
+ }
+ resultSize, err := packVarintRead(payload, &pos)
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d delta result size: %v", i, err)
+ }
+ if baseSize != len(baseBody) {
+ return thinSeen, fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
+ }
+ out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
+ if err != nil {
+ return thinSeen, fmt.Errorf("obj %d delta apply: %v", i, err)
+ }
+ body := append([]byte(nil), out.Bytes()...)
+ out.Release()
+ if resultSize != len(body) {
+ return thinSeen, fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
+ }
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
+ default:
+ if size >= 0 && len(payload) != size {
+ return thinSeen, fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
+ }
+ body := append([]byte(nil), payload...)
+ byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
+ }
+
+ entry := byOffset[objOffset]
+ if entry.body != nil && entry.ty != ObjectTypeInvalid {
+ hdr, err := headerForType(entry.ty, entry.body)
+ if err != nil {
+ return thinSeen, err
+ }
+ raw := append(hdr, entry.body...)
+ hash := repo.hashAlgo.Sum(raw)
+ byHash[hash.String()] = entry
+ }
+ }
+ return thinSeen, nil
+}
+
func removeLooseObject(repoPath, oid string) error {
if len(oid) < 2 {
return ErrInvalidObject