diff options
| author | 2026-01-30 17:27:10 +0100 | |
|---|---|---|
| committer | 2026-01-30 17:27:10 +0100 | |
| commit | c1f17baa57bad0f61e639fc39c8cd5e4872142f6 (patch) | |
| tree | 715935cbfee3358c6e23008680d80473e4a21061 | |
| parent | README: Update packfile/thin/delta related feature items (diff) | |
| signature | No signature | |
packed, delta: Some improvements for base selection
* sort objects by type then size to improve window quality
* prune bad candidates with size ratio checks
* have objects are preferred
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | delta_write_select.go | 50 | ||||
| -rw-r--r-- | packed_write_pack.go | 66 |
3 files changed, 109 insertions, 8 deletions
@@ -30,6 +30,7 @@ Furgit is a fast Git library in pure Go * Multi pack indexes * Repack * Better delta base selection strategy +* Delta reuse; delta islands * Reading reachability bitmaps * Writing reachability bitmaps when writing packfiles * [commit-graph](https://git-scm.com/docs/commit-graph) (in progress) diff --git a/delta_write_select.go b/delta_write_select.go index 2911867d..db6fc022 100644 --- a/delta_write_select.go +++ b/delta_write_select.go @@ -6,9 +6,11 @@ type objectToPack struct { id Hash ty ObjectType body []byte + size int offset uint64 deltaDepth int inPack bool + preferred bool } type deltaContext struct { @@ -36,6 +38,8 @@ func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint64, minSavings } var bestBase *objectToPack var bestDelta []byte + var bestPreferred *objectToPack + var bestPreferredDelta []byte for i := len(ctx.candidates) - 1; i >= 0; i-- { base := ctx.candidates[i] if base.ty != ObjectTypeBlob { @@ -44,14 +48,60 @@ func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint64, minSavings if base.deltaDepth >= maxDepth { continue } + if !deltaSizeOk(base, obj, maxDepth) { + continue + } delta, ok := deltaTry(base.body, obj.body, seed, minSavings) + if base.preferred { + delta, ok = deltaTry(base.body, obj.body, seed, 0) + } if !ok { continue } + if base.preferred { + if bestPreferredDelta == nil || len(delta) < len(bestPreferredDelta) { + bestPreferredDelta = delta + bestPreferred = base + } + continue + } if bestDelta == nil || len(delta) < len(bestDelta) { bestDelta = delta bestBase = base } } + if bestPreferred != nil { + return bestPreferred, bestPreferredDelta + } return bestBase, bestDelta } + +func deltaSizeOk(base, target *objectToPack, maxDepth int) bool { + if base == nil || target == nil { + return false + } + if base.size <= 0 || target.size <= 0 { + return false + } + if maxDepth <= 0 { + maxDepth = 1 + } + if base.deltaDepth >= maxDepth { + return false + } + if target.size < base.size/32 { + return false + } + maxSize := target.size/2 - 32 + if maxSize <= 0 { + return false + } + sizediff := 0 + if base.size < target.size { + sizediff = target.size - base.size + } + if sizediff >= maxSize { + return false + } + return true +} diff --git a/packed_write_pack.go b/packed_write_pack.go index 504c1087..2d3eb4f6 100644 --- a/packed_write_pack.go +++ b/packed_write_pack.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "hash" "io" + "sort" "codeberg.org/lindenii/furgit/internal/zlib" ) @@ -310,7 +311,12 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt return Hash{}, ErrInvalidObject } - pw, err := newPackWriter(w, repo.hashAlgo, uint32(len(objects))) + objInfos, err := repo.packBuildObjectList(objects, opts.EnableDeltas) + if err != nil { + return Hash{}, err + } + + pw, err := newPackWriter(w, repo.hashAlgo, uint32(len(objInfos))) if err != nil { return Hash{}, err } @@ -335,15 +341,16 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt } } - for _, id := range objects { - ty, body, err := repo.ReadObjectTypeRaw(id) + for _, info := range objInfos { + ty, body, err := repo.ReadObjectTypeRaw(info.id) if err != nil { return Hash{}, err } obj := &objectToPack{ - id: id, + id: info.id, ty: ty, body: body, + size: info.size, inPack: true, } startOffset := pw.bytesWritten @@ -414,16 +421,59 @@ func (repo *Repository) seedDeltaCandidatesFromHaves(ctx *deltaContext, haves [] return err } candidate := &objectToPack{ - id: obj.ID, - ty: ty, - body: body, - inPack: false, + id: obj.ID, + ty: ty, + body: body, + size: len(body), + inPack: false, + preferred: true, } ctx.addCandidate(candidate) } return walk.Err() } +type packObjectInfo struct { + id Hash + ty ObjectType + size int + index int +} + +func (repo *Repository) packBuildObjectList(objects []Hash, enableDeltas bool) ([]packObjectInfo, error) { + if repo == nil { + return nil, ErrInvalidObject + } + infos := make([]packObjectInfo, 0, len(objects)) + for i, id := range objects { + ty, size, err := repo.ReadObjectTypeSize(id) + if err != nil { + return nil, err + } + infos = append(infos, packObjectInfo{ + id: id, + ty: ty, + size: int(size), + index: i, + }) + } + if !enableDeltas { + return infos, nil + } + sort.SliceStable(infos, func(i, j int) bool { + ai := infos[i] + aj := infos[j] + if ai.ty != aj.ty { + return ai.ty < aj.ty + } + if ai.size != aj.size { + return ai.size > aj.size + } + return ai.index < aj.index + }) + return infos, nil +} + type packWriteOptions struct { EnableDeltas bool EnableThinPack bool |
