aboutsummaryrefslogtreecommitdiff
path: root/format/pack/ingest/thin_fix.go
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-03-05 18:24:40 +0800
committerGravatar Runxi Yu2026-03-05 19:05:47 +0800
commit57f1818d547f2f1dca38033b4e29f62d89ef80f9 (patch)
tree88d55ac38e2427860bf380c8cce42fcb3bb1e9ee /format/pack/ingest/thin_fix.go
parentinternal/compress/zlib: Use flate's compression consumed counter (diff)
signatureNo signature
format/pack/ingest: Init
Diffstat (limited to 'format/pack/ingest/thin_fix.go')
-rw-r--r--format/pack/ingest/thin_fix.go211
1 files changed, 211 insertions, 0 deletions
diff --git a/format/pack/ingest/thin_fix.go b/format/pack/ingest/thin_fix.go
new file mode 100644
index 00000000..e605c3f2
--- /dev/null
+++ b/format/pack/ingest/thin_fix.go
@@ -0,0 +1,211 @@
+package ingest
+
+import (
+ "encoding/binary"
+ "fmt"
+ "hash/crc32"
+ "io"
+ "os"
+
+ "codeberg.org/lindenii/furgit/internal/compress/zlib"
+ "codeberg.org/lindenii/furgit/objectid"
+ "codeberg.org/lindenii/furgit/objecttype"
+)
+
+// maybeFixThin appends missing bases and rewrites pack header/trailer when needed.
+func maybeFixThin(state *ingestState) error {
+ if len(state.unresolvedRefDeltas) == 0 {
+ return nil
+ }
+ if !state.fixThin {
+ return &ErrThinPackUnresolved{Count: len(state.unresolvedRefDeltas)}
+ }
+ if state.base == nil {
+ return &ErrThinPackUnresolved{Count: len(state.unresolvedRefDeltas)}
+ }
+
+ hashSize := int64(state.algo.Size())
+ info, err := state.packFile.Stat()
+ if err != nil {
+ return err
+ }
+ size := info.Size()
+ if size < hashSize {
+ return fmt.Errorf("format/pack/ingest: pack too short to trim trailer")
+ }
+ newEnd := size - hashSize
+ if err := state.packFile.Truncate(newEnd); err != nil {
+ return err
+ }
+ state.stream.offset = uint64(newEnd)
+
+ baseIDs := unresolvedThinBaseIDs(state)
+ for _, id := range baseIDs {
+ ty, content, err := state.base.ReadBytesContent(id)
+ if err != nil {
+ continue
+ }
+ if _, err := appendBaseObject(state, id, ty, content); err != nil {
+ return err
+ }
+ state.thinFixed = true
+ }
+
+ if err := rewritePackHeaderAndTrailer(state); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// appendBaseObject appends one base object as a new packed non-delta entry.
+func appendBaseObject(state *ingestState, id objectid.ObjectID, realType objecttype.Type, content []byte) (int, error) {
+ start := state.stream.offset
+ header := encodePackEntryHeader(realType, int64(len(content)))
+ if _, err := state.packFile.WriteAt(header, int64(start)); err != nil {
+ return 0, err
+ }
+
+ section := &fileSectionWriter{file: state.packFile, off: int64(start) + int64(len(header))}
+ crc := crc32.NewIEEE()
+ _, _ = crc.Write(header)
+ counting := &countingWriter{dst: section}
+ zw := zlib.NewWriter(io.MultiWriter(counting, crc))
+ if _, err := zw.Write(content); err != nil {
+ return 0, err
+ }
+ if err := zw.Close(); err != nil {
+ return 0, err
+ }
+
+ packedLen := uint64(len(header)) + uint64(counting.n)
+ end := start + packedLen
+ state.stream.offset = end
+
+ record := objectRecord{
+ offset: start,
+ headerLen: uint32(len(header)),
+ packedLen: packedLen,
+ crc32: crc.Sum32(),
+ packedType: realType,
+ realType: realType,
+ declaredSize: int64(len(content)),
+ dataOffset: start + uint64(len(header)),
+ objectID: id,
+ resolved: true,
+ }
+
+ recordIdx := len(state.records)
+ state.records = append(state.records, record)
+ state.offsetToRecord[start] = recordIdx
+ state.objectToRecord[id.String()] = recordIdx
+ state.baseCache.add(recordIdx, realType, content)
+
+ return recordIdx, nil
+}
+
+// fileSectionWriter writes sequentially to file via WriteAt at one base offset.
+type fileSectionWriter struct {
+ file *os.File
+ off int64
+ pos int64
+}
+
+// Write writes src at current section position.
+func (writer *fileSectionWriter) Write(src []byte) (int, error) {
+ if len(src) == 0 {
+ return 0, nil
+ }
+ n, err := writer.file.WriteAt(src, writer.off+writer.pos)
+ writer.pos += int64(n)
+
+ return n, err
+}
+
+// countingWriter counts bytes written to dst.
+type countingWriter struct {
+ dst io.Writer
+ n int
+}
+
+// Write writes src to dst and tracks output byte count.
+func (writer *countingWriter) Write(src []byte) (int, error) {
+ n, err := writer.dst.Write(src)
+ writer.n += n
+
+ return n, err
+}
+
+// rewritePackHeaderAndTrailer rewrites object count and trailer hash using ReadAt/WriteAt.
+func rewritePackHeaderAndTrailer(state *ingestState) error {
+ var countRaw [4]byte
+ binary.BigEndian.PutUint32(countRaw[:], uint32(len(state.records)))
+ if _, err := state.packFile.WriteAt(countRaw[:], 8); err != nil {
+ return err
+ }
+
+ info, err := state.packFile.Stat()
+ if err != nil {
+ return err
+ }
+ endWithoutTrailer := info.Size()
+
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return err
+ }
+ var (
+ buf [128 << 10]byte
+ pos int64
+ )
+ for pos < endWithoutTrailer {
+ want := int64(len(buf))
+ remaining := endWithoutTrailer - pos
+ if remaining < want {
+ want = remaining
+ }
+ n, err := state.packFile.ReadAt(buf[:want], pos)
+ if err != nil && err != io.EOF {
+ return err
+ }
+ if n == 0 {
+ return io.ErrUnexpectedEOF
+ }
+ _, _ = hashImpl.Write(buf[:n])
+ pos += int64(n)
+ }
+
+ sum := hashImpl.Sum(nil)
+ if _, err := state.packFile.WriteAt(sum, endWithoutTrailer); err != nil {
+ return err
+ }
+
+ packHash, err := objectid.FromBytes(state.algo, sum)
+ if err != nil {
+ return err
+ }
+ state.packHash = packHash
+ state.objectCountHeader = uint32(len(state.records))
+ state.stream.offset = uint64(endWithoutTrailer + int64(len(sum)))
+
+ return nil
+}
+
+// encodePackEntryHeader encodes one non-delta packed entry header.
+func encodePackEntryHeader(ty objecttype.Type, size int64) []byte {
+ var out [16]byte
+ n := 0
+ s := uint64(size)
+ c := byte((uint8(ty) << 4) | byte(s&0x0f))
+ s >>= 4
+ for s != 0 {
+ out[n] = c | 0x80
+ n++
+ c = byte(s & 0x7f)
+ s >>= 7
+ }
+ out[n] = c
+ n++
+
+ return append([]byte(nil), out[:n]...)
+}