diff options
| author | 2026-01-30 17:06:51 +0100 | |
|---|---|---|
| committer | 2026-01-30 17:06:51 +0100 | |
| commit | 8e320c9ca634e6b2431f9442b7d5191864735ae4 (patch) | |
| tree | b2e3d18144865fad0508a4f890dada5aee6ba940 | |
| parent | reachability: Add basic reachability API (diff) | |
| signature | No signature | |
packed, delta: Implement thin packs
| -rw-r--r-- | delta_write_select.go | 1 | ||||
| -rw-r--r-- | packed_write_pack.go | 102 | ||||
| -rw-r--r-- | packed_write_test.go | 212 |
3 files changed, 306 insertions, 9 deletions
diff --git a/delta_write_select.go b/delta_write_select.go index 9113b80f..2911867d 100644 --- a/delta_write_select.go +++ b/delta_write_select.go @@ -8,6 +8,7 @@ type objectToPack struct { body []byte offset uint64 deltaDepth int + inPack bool } type deltaContext struct { diff --git a/packed_write_pack.go b/packed_write_pack.go index 329e2a7f..1d8cbc1e 100644 --- a/packed_write_pack.go +++ b/packed_write_pack.go @@ -283,11 +283,39 @@ func packOfsEncode(dist uint64) ([]byte, error) { // packWrite writes a pack stream for the provided object ids. func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) { + if opts.EnableThinPack { + return Hash{}, errThinPackUnimplemented + } + return repo.packWriteObjects(w, objects, opts, nil) +} + +// packWriteReachable writes a pack stream for objects reachable from the +// provided reachability query. +func (repo *Repository) packWriteReachable(w io.Writer, query ReachabilityQuery, opts packWriteOptions) (Hash, error) { if repo == nil { return Hash{}, ErrInvalidObject } - if opts.EnableThinPack { - return Hash{}, errThinPackUnimplemented + query.Mode = ReachabilityAllObjects + walk, err := repo.ReachableObjects(query) + if err != nil { + return Hash{}, err + } + var objects []Hash + for obj := range walk.Seq() { + objects = append(objects, obj.ID) + } + if err := walk.Err(); err != nil { + return Hash{}, err + } + return repo.packWriteObjects(w, objects, opts, walk) +} + +func (repo *Repository) packWriteObjects(w io.Writer, objects []Hash, opts packWriteOptions, have *ReachabilityWalk) (Hash, error) { + if repo == nil { + return Hash{}, ErrInvalidObject + } + if opts.EnableThinPack && have == nil { + return Hash{}, ErrInvalidObject } if len(objects) > int(^uint32(0)) { return Hash{}, ErrInvalidObject @@ -312,15 +340,22 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt deltaSeed = binary.LittleEndian.Uint64(seedBytes[:]) } + if opts.EnableDeltas && opts.EnableThinPack { + if err := repo.seedDeltaCandidatesFromHaves(&dctx, have.query.Haves); err != nil { + return Hash{}, err + } + } + for _, id := range objects { ty, body, err := repo.ReadObjectTypeRaw(id) if err != nil { return Hash{}, err } obj := &objectToPack{ - id: id, - ty: ty, - body: body, + id: id, + ty: ty, + body: body, + inPack: true, } startOffset := pw.bytesWritten wroteDelta := false @@ -328,11 +363,27 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt if opts.EnableDeltas && ty == ObjectTypeBlob { base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth) if base != nil && delta != nil { - if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil { - return Hash{}, err + switch { + case base.inPack: + if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil { + return Hash{}, err + } + wroteDelta = true + obj.deltaDepth = base.deltaDepth + 1 + case opts.EnableThinPack: + inHave, err := have.HaveContains(base.id) + if err != nil { + return Hash{}, err + } + if inHave { + if err := pw.WriteRefDelta(base.id, len(base.body), len(body), delta); err != nil { + return Hash{}, err + } + wroteDelta = true + obj.deltaDepth = base.deltaDepth + 1 + } + default: } - wroteDelta = true - obj.deltaDepth = base.deltaDepth + 1 } } if !wroteDelta { @@ -351,6 +402,39 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt return pw.Close() } +func (repo *Repository) seedDeltaCandidatesFromHaves(ctx *deltaContext, haves []Hash) error { + if repo == nil { + return ErrInvalidObject + } + if ctx == nil || ctx.window <= 0 || len(haves) == 0 { + return nil + } + walk, err := repo.ReachableObjects(ReachabilityQuery{ + Wants: haves, + Mode: ReachabilityAllObjects, + }) + if err != nil { + return err + } + for obj := range walk.Seq() { + if obj.Type != ObjectTypeBlob { + continue + } + ty, body, err := repo.ReadObjectTypeRaw(obj.ID) + if err != nil { + return err + } + candidate := &objectToPack{ + id: obj.ID, + ty: ty, + body: body, + inPack: false, + } + ctx.addCandidate(candidate) + } + return walk.Err() +} + type packWriteOptions struct { EnableDeltas bool EnableThinPack bool diff --git a/packed_write_test.go b/packed_write_test.go index ccd81844..82e573b4 100644 --- a/packed_write_test.go +++ b/packed_write_test.go @@ -358,6 +358,90 @@ func TestPackWriteDeltas(t *testing.T) { _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") } +func TestPackWriteThinPackReachable(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + base := bytes.Repeat([]byte("A"), 16384) + if err := os.WriteFile(filepath.Join(workDir, "file.txt"), base, 0o644); err != nil { + t.Fatalf("write base file: %v", err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "base") + haveHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + mod := append([]byte(nil), base...) + mod[1024] = 'B' + if err := os.WriteFile(filepath.Join(workDir, "file.txt"), mod, 0o644); err != nil { + t.Fatalf("write mod file: %v", err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "target") + wantHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + wantID, _ := repo.ParseHash(wantHash) + haveID, _ := repo.ParseHash(haveHash) + + query := ReachabilityQuery{ + Wants: []Hash{wantID}, + Haves: []Hash{haveID}, + Mode: ReachabilityAllObjects, + StopAtHaves: true, + } + var buf bytes.Buffer + if _, err := repo.packWriteReachable(&buf, query, packWriteOptions{ + EnableDeltas: true, + EnableThinPack: true, + MinDeltaSavings: 1, + }); err != nil { + t.Fatalf("packWriteReachable failed: %v", err) + } + + thinSeen, err := checkThinPackStream(buf.Bytes(), repo) + if err != nil { + t.Fatalf("thin pack stream invalid: %v", err) + } + if !thinSeen { + t.Fatalf("expected thin pack with ref-delta base outside pack") + } + + packDir := filepath.Join(repoPath, "objects", "pack") + if err := os.MkdirAll(packDir, 0o755); err != nil { + t.Fatalf("failed to create pack dir: %v", err) + } + packPath := filepath.Join(packDir, "furgit-thin-test.pack") + idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" + _ = os.Remove(packPath) + _ = os.Remove(idxPath) + + cmd := exec.Command("git", "index-pack", "--stdin", "--fix-thin", "-o", idxPath, packPath) + cmd.Dir = repoPath + cmd.Env = append(os.Environ(), + "GIT_CONFIG_GLOBAL=/dev/null", + "GIT_CONFIG_SYSTEM=/dev/null", + ) + cmd.Stdin = bytes.NewReader(buf.Bytes()) + output, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("git index-pack --fix-thin failed: %v\n%s", err, output) + } + + _ = gitCmd(t, repoPath, "cat-file", "-p", wantHash) + _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") + + _ = os.Remove(packPath) + _ = os.Remove(idxPath) +} + func checkPackStream(path string, algo hashAlgorithm, objectCount int) error { data, err := os.ReadFile(path) if err != nil { @@ -504,6 +588,134 @@ func checkPackStream(path string, algo hashAlgorithm, objectCount int) error { return nil } +func checkThinPackStream(data []byte, repo *Repository) (bool, error) { + if repo == nil { + return false, ErrInvalidObject + } + if len(data) < 12 { + return false, ErrInvalidObject + } + if binary.BigEndian.Uint32(data[0:4]) != packMagic || binary.BigEndian.Uint32(data[4:8]) != packVersion2 { + return false, ErrInvalidObject + } + count := int(binary.BigEndian.Uint32(data[8:12])) + pos := 12 + hashSize := repo.hashAlgo.Size() + type objEntry struct { + offset uint64 + ty ObjectType + body []byte + } + byOffset := make(map[uint64]objEntry, count) + byHash := make(map[string]objEntry, count) + thinSeen := false + + for i := 0; i < count; i++ { + objOffset := uint64(pos) + ty, size, consumed, err := packHeaderParse(data[pos:]) + if err != nil { + return thinSeen, fmt.Errorf("obj %d header at %d: %v", i, pos, err) + } + pos += consumed + baseTy := ObjectTypeInvalid + var baseBody []byte + switch ty { + case ObjectTypeOfsDelta: + dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:]) + if err != nil { + return thinSeen, fmt.Errorf("obj %d ofs at %d: %v", i, pos, err) + } + pos += distConsumed + if dist == 0 || dist > objOffset { + return thinSeen, fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos) + } + baseOffset := objOffset - dist + base, ok := byOffset[baseOffset] + if !ok { + return thinSeen, fmt.Errorf("obj %d ofs at %d: missing base", i, pos) + } + baseTy = base.ty + baseBody = base.body + case ObjectTypeRefDelta: + if pos+hashSize > len(data) { + return thinSeen, ErrInvalidObject + } + var baseHash Hash + copy(baseHash.data[:], data[pos:pos+hashSize]) + baseHash.algo = repo.hashAlgo + baseEntry, ok := byHash[baseHash.String()] + if ok { + baseTy = baseEntry.ty + baseBody = baseEntry.body + } else { + thinSeen = true + ty, body, err := repo.ReadObjectTypeRaw(baseHash) + if err != nil { + return thinSeen, err + } + baseTy = ty + baseBody = body + } + pos += hashSize + default: + } + + payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size) + if err != nil { + return thinSeen, fmt.Errorf("obj %d zlib at %d: %v", i, pos, err) + } + payload := append([]byte(nil), payloadBuf.Bytes()...) + payloadBuf.Release() + pos += zconsumed + switch ty { + case ObjectTypeOfsDelta, ObjectTypeRefDelta: + if baseBody == nil { + return thinSeen, fmt.Errorf("obj %d missing base body", i) + } + pos := 0 + baseSize, err := packVarintRead(payload, &pos) + if err != nil { + return thinSeen, fmt.Errorf("obj %d delta base size: %v", i, err) + } + resultSize, err := packVarintRead(payload, &pos) + if err != nil { + return thinSeen, fmt.Errorf("obj %d delta result size: %v", i, err) + } + if baseSize != len(baseBody) { + return thinSeen, fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody)) + } + out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload)) + if err != nil { + return thinSeen, fmt.Errorf("obj %d delta apply: %v", i, err) + } + body := append([]byte(nil), out.Bytes()...) + out.Release() + if resultSize != len(body) { + return thinSeen, fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize) + } + byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body} + default: + if size >= 0 && len(payload) != size { + return thinSeen, fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size) + } + body := append([]byte(nil), payload...) + byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body} + } + + entry := byOffset[objOffset] + if entry.body != nil && entry.ty != ObjectTypeInvalid { + hdr, err := headerForType(entry.ty, entry.body) + if err != nil { + return thinSeen, err + } + raw := append(hdr, entry.body...) + hash := repo.hashAlgo.Sum(raw) + byHash[hash.String()] = entry + } + } + return thinSeen, nil +} + func removeLooseObject(repoPath, oid string) error { if len(oid) < 2 { return ErrInvalidObject |
