diff options
| author | 2026-06-14 11:58:31 +0000 | |
|---|---|---|
| committer | 2026-06-14 11:58:31 +0000 | |
| commit | 64ebbb5e32ee2f44bdd09fae5bb1153dfd1aad22 (patch) | |
| tree | 13516210dd369d9bd6a067f22826fac70d481a61 | |
| parent | object/tree: Add nonadjacent conflicting tree blob test (diff) | |
internal/format/packidx: Lookup interpolation heuristic
On my copy of linux.git with a few packs, -.32 on all-miss lookups, -.15
on all-hit lookups, -.05 on walking all objects in the root tree.
For microbenchmarks, -.4 on honest workloads, and +.85 on fully
adversarial workloads. Might be able to improve the adversarial one in
the future, but it's a rare case not worth optimizing too much for.
| -rw-r--r-- | internal/format/packidx/lookup.go | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/internal/format/packidx/lookup.go b/internal/format/packidx/lookup.go index d1293f47..a9196aff 100644 --- a/internal/format/packidx/lookup.go +++ b/internal/format/packidx/lookup.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "math/bits" ) // Lookup searches the index for one object ID @@ -18,6 +19,52 @@ func (idx *Packidx) Lookup(oid []byte) (offset uint64, found bool, err error) { lo, hi := idx.fanoutRange(oid[0]) + // Object IDs are uniform for honest inputs, + // interp on next 8 octets converges in O(log log n). + // + // OIDs are public unkeyed hashes, + // so an attacker may sample/filter/mine prefix clusters, + // making interpolation mis-estimate every probe. + // See https://runxiyu.org/comp/ch4ht/ + // for why cryptographic hash algorithms are insufficient. + // + // Cap the interp at bisect's probe count and finish by bisect; + // adversarial at O(log n) plus small interpolation overhead, + // honest exit well under the cap. + target := binary.BigEndian.Uint64(oid[1:9]) + + for budget := bits.Len(uint(hi - lo)); hi-lo > 8 && budget > 0; budget-- { + loKey := binary.BigEndian.Uint64(idx.OIDAt(lo)[1:9]) + hiKey := binary.BigEndian.Uint64(idx.OIDAt(hi - 1)[1:9]) + + var mid int + + switch { + case target <= loKey: + mid = lo + case target >= hiKey: + mid = hi - 1 + default: + frac := float64(target-loKey) / float64(hiKey-loKey) + mid = lo + int(frac*float64(hi-lo-1)) + } + + switch cmp := bytes.Compare(oid, idx.OIDAt(mid)); { + case cmp == 0: + offset, err = idx.OffsetAt(mid) + if err != nil { + return 0, false, err + } + + return offset, true, nil + case cmp < 0: + hi = mid + default: + lo = mid + 1 + } + } + + // Interpolation narrowed or capped; bisect to finish. for lo < hi { mid := lo + (hi-lo)/2 |
