diff options
Diffstat (limited to 'format/pack/ingest')
| -rw-r--r-- | format/pack/ingest/finalize.go | 7 | ||||
| -rw-r--r-- | format/pack/ingest/idx_write.go | 14 | ||||
| -rw-r--r-- | format/pack/ingest/ingest.go | 8 | ||||
| -rw-r--r-- | format/pack/ingest/ingest_test.go | 25 | ||||
| -rw-r--r-- | format/pack/ingest/resolve.go | 17 | ||||
| -rw-r--r-- | format/pack/ingest/rev_write.go | 11 | ||||
| -rw-r--r-- | format/pack/ingest/stream.go | 14 | ||||
| -rw-r--r-- | format/pack/ingest/stream_scan.go | 22 | ||||
| -rw-r--r-- | format/pack/ingest/temp.go | 8 | ||||
| -rw-r--r-- | format/pack/ingest/thin_fix.go | 23 |
10 files changed, 149 insertions, 0 deletions
diff --git a/format/pack/ingest/finalize.go b/format/pack/ingest/finalize.go index 06b30102..86fef07c 100644 --- a/format/pack/ingest/finalize.go +++ b/format/pack/ingest/finalize.go @@ -12,6 +12,7 @@ func finalizeArtifacts(state *ingestState) (Result, error) { base := "pack-" + state.packHash.String() packFinal := base + ".pack" idxFinal := base + ".idx" + revFinal := "" if state.writeRev { revFinal = base + ".rev" @@ -20,9 +21,11 @@ func finalizeArtifacts(state *ingestState) (Result, error) { if err := linkTempToFinal(state, state.packTmpName, packFinal); err != nil { return Result{}, err } + if err := linkTempToFinal(state, state.idxTmpName, idxFinal); err != nil { return Result{}, err } + if state.writeRev { if err := linkTempToFinal(state, state.revTmpName, revFinal); err != nil { return Result{}, err @@ -44,9 +47,11 @@ func rollbackTemporaryArtifacts(state *ingestState) { if state.packTmpName != "" { _ = state.destination.Remove(state.packTmpName) } + if state.idxTmpName != "" { _ = state.destination.Remove(state.idxTmpName) } + if state.revTmpName != "" { _ = state.destination.Remove(state.revTmpName) } @@ -57,6 +62,7 @@ func linkTempToFinal(state *ingestState, tmp, final string) error { if tmp == "" || final == "" { return fmt.Errorf("format/pack/ingest: invalid finalize names tmp=%q final=%q", tmp, final) } + if strings.Contains(final, "/") { return fmt.Errorf("format/pack/ingest: final name must be leaf: %q", final) } @@ -67,6 +73,7 @@ func linkTempToFinal(state *ingestState, tmp, final string) error { return nil } + if errors.Is(err, fs.ErrExist) { _ = state.destination.Remove(tmp) diff --git a/format/pack/ingest/idx_write.go b/format/pack/ingest/idx_write.go index 1e5f20c4..f3d63b76 100644 --- a/format/pack/ingest/idx_write.go +++ b/format/pack/ingest/idx_write.go @@ -17,6 +17,7 @@ const ( // writeIdx writes idx v2 for resolved records. func writeIdx(state *ingestState) error { order := buildIdxOrder(state) + hashImpl, err := state.algo.New() if err != nil { return err @@ -26,6 +27,7 @@ func writeIdx(state *ingestState) error { if _, err := state.idxFile.Write(src); err != nil { return err } + if _, err := hashImpl.Write(src); err != nil { return err } @@ -36,19 +38,23 @@ func writeIdx(state *ingestState) error { var scratch [8]byte binary.BigEndian.PutUint32(scratch[:4], idxMagicV2) binary.BigEndian.PutUint32(scratch[4:8], idxVersionV2) + if err := write(scratch[:8]); err != nil { return err } var fanout [256]uint32 + for _, recordIdx := range order { idRaw := state.records[recordIdx].objectID.Bytes() fanout[idRaw[0]]++ } + var cumulative uint32 for i := range fanout { cumulative += fanout[i] binary.BigEndian.PutUint32(scratch[:4], cumulative) + if err := write(scratch[:4]); err != nil { return err } @@ -63,27 +69,33 @@ func writeIdx(state *ingestState) error { for _, recordIdx := range order { binary.BigEndian.PutUint32(scratch[:4], state.records[recordIdx].crc32) + if err := write(scratch[:4]); err != nil { return err } } largeOffsets := make([]uint64, 0) + for _, recordIdx := range order { offset := state.records[recordIdx].offset if offset >= 0x80000000 { word := 0x80000000 | uint32(len(largeOffsets)) largeOffsets = append(largeOffsets, offset) + binary.BigEndian.PutUint32(scratch[:4], word) } else { binary.BigEndian.PutUint32(scratch[:4], uint32(offset)) } + if err := write(scratch[:4]); err != nil { return err } } + for _, off := range largeOffsets { binary.BigEndian.PutUint64(scratch[:8], off) + if err := write(scratch[:8]); err != nil { return err } @@ -107,6 +119,7 @@ func buildIdxOrder(state *ingestState) []int { for idx := range state.records { out = append(out, idx) } + slices.SortFunc(out, func(a, b int) int { return bytes.Compare(state.records[a].objectID.Bytes(), state.records[b].objectID.Bytes()) }) @@ -130,6 +143,7 @@ func writeAndHash(dst io.Writer, hashImpl hash.Hash, src []byte) error { if _, err := dst.Write(src); err != nil { return err } + if _, err := hashImpl.Write(src); err != nil { return err } diff --git a/format/pack/ingest/ingest.go b/format/pack/ingest/ingest.go index 5d6bbfce..4ae7ff2c 100644 --- a/format/pack/ingest/ingest.go +++ b/format/pack/ingest/ingest.go @@ -7,6 +7,7 @@ func ingest(state *ingestState) (out Result, err error) { if err := openTemporaryArtifacts(state); err != nil { return Result{}, err } + defer func() { _ = closeTemporaryArtifacts(state) if err != nil { @@ -17,20 +18,25 @@ func ingest(state *ingestState) (out Result, err error) { if err := streamPackAndScan(state); err != nil { return Result{}, err } + if err := resolveAll(state); err != nil { return Result{}, err } + if err := maybeFixThin(state); err != nil { return Result{}, err } + if state.thinFixed { if err := resolveAll(state); err != nil { return Result{}, err } } + if len(state.unresolvedRefDeltas) > 0 { return Result{}, &ErrThinPackUnresolved{Count: len(state.unresolvedRefDeltas)} } + if err := verifyResolvedRecords(state); err != nil { return Result{}, err } @@ -38,9 +44,11 @@ func ingest(state *ingestState) (out Result, err error) { if err := state.packFile.Sync(); err != nil { return Result{}, &ErrDestinationWrite{Op: fmt.Sprintf("sync pack: %v", err)} } + if err := writeIdx(state); err != nil { return Result{}, err } + if err := writeRev(state); err != nil { return Result{}, err } diff --git a/format/pack/ingest/ingest_test.go b/format/pack/ingest/ingest_test.go index 5526d839..35f19b43 100644 --- a/format/pack/ingest/ingest_test.go +++ b/format/pack/ingest/ingest_test.go @@ -42,6 +42,7 @@ func fixtureBytes(t *testing.T, algo objectid.Algorithm, name string) []byte { t.Helper() path := fixturePath(t, algo, name) + data, err := os.ReadFile(path) if err != nil { t.Fatalf("read fixture %q: %v", path, err) @@ -55,16 +56,19 @@ func fixtureMetadata(t *testing.T, algo objectid.Algorithm) map[string]string { t.Helper() data := fixtureBytes(t, algo, "METADATA.txt") + out := make(map[string]string) for line := range strings.SplitSeq(strings.TrimSpace(string(data)), "\n") { line = strings.TrimSpace(line) if line == "" { continue } + key, value, ok := strings.Cut(line, "=") if !ok { t.Fatalf("invalid fixture metadata line %q", line) } + out[strings.TrimSpace(key)] = strings.TrimSpace(value) } @@ -76,6 +80,7 @@ func fixtureOID(t *testing.T, algo objectid.Algorithm, key string) objectid.Obje t.Helper() meta := fixtureMetadata(t, algo) + hex, ok := meta[key] if !ok { t.Fatalf("missing fixture metadata key %q", key) @@ -103,10 +108,12 @@ func verifyReindexOracle(t *testing.T, repo *testgit.TestRepo, packPath, idxPath if err != nil { t.Fatalf("read idx: %v", err) } + wantIdx, err := os.ReadFile(oracleIdxPath) if err != nil { t.Fatalf("read oracle idx: %v", err) } + if !bytes.Equal(gotIdx, wantIdx) { t.Fatal("idx bytes differ from git index-pack output") } @@ -115,10 +122,12 @@ func verifyReindexOracle(t *testing.T, repo *testgit.TestRepo, packPath, idxPath if err != nil { t.Fatalf("read rev: %v", err) } + wantRev, err := os.ReadFile(oracleRevPath) if err != nil { t.Fatalf("read oracle rev: %v", err) } + if !bytes.Equal(gotRev, wantRev) { t.Fatal("rev bytes differ from git index-pack output") } @@ -132,10 +141,12 @@ func TestIngestNonThinPackWritesPackIdxRev(t *testing.T) { packBytes := fixtureBytes(t, algo, "nonthin.pack") receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + packRoot, err := os.OpenRoot(filepath.Join(receiver.Dir(), "objects", "pack")) if err != nil { t.Fatalf("open pack root: %v", err) } + defer func() { err = packRoot.Close() if err != nil { @@ -147,9 +158,11 @@ func TestIngestNonThinPackWritesPackIdxRev(t *testing.T) { if err != nil { t.Fatalf("Ingest: %v", err) } + if result.ThinFixed { t.Fatalf("ThinFixed = true, want false") } + if result.RevName == "" { t.Fatal("RevName is empty") } @@ -158,10 +171,12 @@ func TestIngestNonThinPackWritesPackIdxRev(t *testing.T) { if err != nil { t.Fatalf("stat pack: %v", err) } + _, err = packRoot.Stat(result.IdxName) if err != nil { t.Fatalf("stat idx: %v", err) } + _, err = packRoot.Stat(result.RevName) if err != nil { t.Fatalf("stat rev: %v", err) @@ -186,10 +201,12 @@ func TestIngestThinPackWithoutFixReturnsUnresolved(t *testing.T) { receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) packDir := filepath.Join(receiver.Dir(), "objects", "pack") + packRoot, err := os.OpenRoot(packDir) if err != nil { t.Fatalf("open pack root: %v", err) } + defer func() { err = packRoot.Close() if err != nil { @@ -211,6 +228,7 @@ func TestIngestThinPackWithoutFixReturnsUnresolved(t *testing.T) { if err != nil { t.Fatalf("glob pack files: %v", err) } + if len(matches) != 0 { t.Fatalf("found finalized pack files after failure: %v", matches) } @@ -230,6 +248,7 @@ func TestIngestThinPackWithFixThin(t *testing.T) { if err != nil { t.Fatalf("open pack root: %v", err) } + defer func() { err = packRoot.Close() if err != nil { @@ -246,6 +265,7 @@ func TestIngestThinPackWithFixThin(t *testing.T) { if err != nil { t.Fatalf("open receiver root: %v", err) } + defer func() { err = receiverRoot.Close() if err != nil { @@ -257,6 +277,7 @@ func TestIngestThinPackWithFixThin(t *testing.T) { if err != nil { t.Fatalf("repository.Open(receiver): %v", err) } + defer func() { err = receiverRepo.Close() if err != nil { @@ -268,6 +289,7 @@ func TestIngestThinPackWithFixThin(t *testing.T) { if err != nil { t.Fatalf("Ingest(thin): %v", err) } + if !result.ThinFixed { t.Fatal("ThinFixed = false, want true") } @@ -295,10 +317,12 @@ func TestIngestPackTrailerMismatch(t *testing.T) { receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) packDir := filepath.Join(receiver.Dir(), "objects", "pack") + packRoot, err := os.OpenRoot(packDir) if err != nil { t.Fatalf("open pack root: %v", err) } + defer func() { err = packRoot.Close() if err != nil { @@ -320,6 +344,7 @@ func TestIngestPackTrailerMismatch(t *testing.T) { if err != nil { t.Fatalf("glob pack files: %v", err) } + if len(matches) != 0 { t.Fatalf("found finalized pack files after failure: %v", matches) } diff --git a/format/pack/ingest/resolve.go b/format/pack/ingest/resolve.go index 6058a275..f5dfb28a 100644 --- a/format/pack/ingest/resolve.go +++ b/format/pack/ingest/resolve.go @@ -27,6 +27,7 @@ func resolveAll(state *ingestState) error { } visiting := make(map[int]struct{}) + ty, content, err := resolveRecord(state, idx, visiting) if err != nil { if errors.Is(err, errExternalThinBase) { @@ -62,6 +63,7 @@ func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (obje if _, ok := visiting[idx]; ok { return objecttype.TypeInvalid, nil, &ErrDeltaCycle{Offset: state.records[idx].offset} } + visiting[idx] = struct{}{} defer delete(visiting, idx) @@ -75,6 +77,7 @@ func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (obje if err != nil { return objecttype.TypeInvalid, nil, err } + if record.resolved { state.baseCache.add(idx, record.realType, content) @@ -85,6 +88,7 @@ func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (obje if err != nil { return objecttype.TypeInvalid, nil, err } + record.objectID = id record.realType = ty record.resolved = true @@ -108,6 +112,7 @@ func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (obje Reason: "missing ofs-delta base entry", } } + baseType, baseContent, err = resolveRecord(state, baseIdx, visiting) if err != nil { return objecttype.TypeInvalid, nil, err @@ -138,6 +143,7 @@ func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (obje if err != nil { return objecttype.TypeInvalid, nil, err } + record.objectID = id record.realType = ty record.resolved = true @@ -158,6 +164,7 @@ func readBaseRecordContent(state *ingestState, idx int) (objecttype.Type, []byte if err != nil { return objecttype.TypeInvalid, nil, err } + if int64(len(content)) != record.declaredSize { return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ Offset: record.offset, @@ -179,12 +186,14 @@ func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, bas if err != nil { return objecttype.TypeInvalid, nil, err } + if int64(len(deltaPayload)) != record.declaredSize { return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ Offset: record.offset, Reason: fmt.Sprintf("delta payload size mismatch got %d want %d", len(deltaPayload), record.declaredSize), } } + srcSize, dstSize, err := readDeltaHeaderSizes(deltaPayload) if err != nil { return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ @@ -192,6 +201,7 @@ func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, bas Reason: fmt.Sprintf("read delta header: %v", err), } } + if srcSize != len(baseContent) { return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ Offset: record.offset, @@ -206,6 +216,7 @@ func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, bas Reason: fmt.Sprintf("apply delta: %v", err), } } + if len(content) != dstSize { return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ Offset: record.offset, @@ -222,6 +233,7 @@ func inflateRecordPayload(state *ingestState, idx int) ([]byte, error) { if record.packedLen < uint64(record.headerLen) { return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: "entry packed span underflow"} } + compressedOffset := record.offset + uint64(record.headerLen) compressedLen := record.packedLen - uint64(record.headerLen) section := io.NewSectionReader(state.packFile, int64(compressedOffset), int64(compressedLen)) @@ -230,6 +242,7 @@ func inflateRecordPayload(state *ingestState, idx int) ([]byte, error) { if err != nil { return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("open payload zlib: %v", err)} } + defer func() { _ = reader.Close() }() out, err := io.ReadAll(reader) @@ -251,6 +264,7 @@ func hashCanonicalObject(algo objectid.Algorithm, ty objecttype.Type, content [] if err != nil { return objectid.ObjectID{}, err } + _, _ = hashImpl.Write(header) _, _ = hashImpl.Write(content) @@ -260,11 +274,13 @@ func hashCanonicalObject(algo objectid.Algorithm, ty objecttype.Type, content [] // unresolvedThinBaseIDs returns sorted unique unresolved ref base IDs. func unresolvedThinBaseIDs(state *ingestState) []objectid.ObjectID { seen := make(map[objectid.ObjectID]struct{}) + for _, idx := range state.unresolvedRefDeltas { record := state.records[idx] if record.packedType != objecttype.TypeRefDelta { continue } + seen[record.baseObject] = struct{}{} } @@ -272,6 +288,7 @@ func unresolvedThinBaseIDs(state *ingestState) []objectid.ObjectID { for id := range seen { out = append(out, id) } + slices.SortFunc(out, func(a, b objectid.ObjectID) int { return bytes.Compare(a.RawBytes(), b.RawBytes()) }) diff --git a/format/pack/ingest/rev_write.go b/format/pack/ingest/rev_write.go index cf95c782..5e9dbcbd 100644 --- a/format/pack/ingest/rev_write.go +++ b/format/pack/ingest/rev_write.go @@ -19,10 +19,12 @@ func writeRev(state *ingestState) error { } idxOrder := buildIdxOrder(state) + recordToIdxPos := make([]int, len(state.records)) for pos, recordIdx := range idxOrder { recordToIdxPos[recordIdx] = pos } + packOrder := buildPackOrder(state) hashImpl, err := state.algo.New() @@ -32,20 +34,26 @@ func writeRev(state *ingestState) error { var scratch [8]byte binary.BigEndian.PutUint32(scratch[:4], revMagic) + if err := writeAndHash(state.revFile, hashImpl, scratch[:4]); err != nil { return err } + binary.BigEndian.PutUint32(scratch[:4], revVersion) + if err := writeAndHash(state.revFile, hashImpl, scratch[:4]); err != nil { return err } + binary.BigEndian.PutUint32(scratch[:4], hashID(state.algo)) + if err := writeAndHash(state.revFile, hashImpl, scratch[:4]); err != nil { return err } for _, recordIdx := range packOrder { binary.BigEndian.PutUint32(scratch[:4], uint32(recordToIdxPos[recordIdx])) + if err := writeAndHash(state.revFile, hashImpl, scratch[:4]); err != nil { return err } @@ -54,6 +62,7 @@ func writeRev(state *ingestState) error { if err := writeAndHash(state.revFile, hashImpl, state.packHash.Bytes()); err != nil { return err } + revHash := hashImpl.Sum(nil) if _, err := state.revFile.Write(revHash); err != nil { return err @@ -68,8 +77,10 @@ func buildPackOrder(state *ingestState) []int { for idx := range state.records { out = append(out, idx) } + slices.SortFunc(out, func(a, b int) int { offA := state.records[a].offset + offB := state.records[b].offset switch { case offA < offB: diff --git a/format/pack/ingest/stream.go b/format/pack/ingest/stream.go index 61f6079b..78302da2 100644 --- a/format/pack/ingest/stream.go +++ b/format/pack/ingest/stream.go @@ -54,6 +54,7 @@ func (scanner *streamScanner) fill(min int) error { if min <= 0 { return nil } + if min > len(scanner.buf) { return fmt.Errorf("format/pack/ingest: fill(%d) exceeds scanner buffer", min) } @@ -67,6 +68,7 @@ func (scanner *streamScanner) fill(min int) error { if readN > 0 { scanner.n += readN } + if err != nil { if err == io.EOF && scanner.n-scanner.off >= min { return nil @@ -74,6 +76,7 @@ func (scanner *streamScanner) fill(min int) error { return err } + if readN == 0 { return io.ErrNoProgress } @@ -87,6 +90,7 @@ func (scanner *streamScanner) use(n int) error { if n < 0 || n > scanner.n-scanner.off { return fmt.Errorf("format/pack/ingest: invalid consume length %d", n) } + if n == 0 { return nil } @@ -97,6 +101,7 @@ func (scanner *streamScanner) use(n int) error { return err } } + if scanner.inEntryCRC { scanner.entryCRC = crc32.Update(scanner.entryCRC, crc32.IEEETable, chunk) } @@ -132,7 +137,9 @@ func (scanner *streamScanner) Read(dst []byte) (int, error) { if n > unread { n = unread } + copy(dst, scanner.buf[scanner.off:scanner.off+n]) + if err := scanner.use(n); err != nil { return 0, err } @@ -179,17 +186,21 @@ func (scanner *streamScanner) finishAndFlushTrailer() error { } trailer := make([]byte, scanner.hashSize) + scanner.hashEnabled = false if err := scanner.readFull(trailer); err != nil { return &ErrPackTrailerMismatch{} } + scanner.packTrailer = append(scanner.packTrailer[:0], trailer...) var probe [1]byte + n, err := scanner.Read(probe[:]) if n > 0 || err == nil { return fmt.Errorf("format/pack/ingest: pack has trailing garbage") } + if err != io.EOF { return err } @@ -213,6 +224,7 @@ func (scanner *streamScanner) endEntryCRC() (uint32, error) { if !scanner.inEntryCRC { return 0, fmt.Errorf("format/pack/ingest: entry CRC not started") } + crc := scanner.entryCRC scanner.entryCRC = 0 scanner.inEntryCRC = false @@ -233,9 +245,11 @@ func (scanner *streamScanner) flushConsumedPrefix() error { if err != nil { return &ErrDestinationWrite{Op: fmt.Sprintf("write pack: %v", err)} } + if n == 0 { return &ErrDestinationWrite{Op: "write pack: short write"} } + written += n } diff --git a/format/pack/ingest/stream_scan.go b/format/pack/ingest/stream_scan.go index e80e9a3e..8f429e43 100644 --- a/format/pack/ingest/stream_scan.go +++ b/format/pack/ingest/stream_scan.go @@ -49,13 +49,16 @@ func streamPackAndScan(state *ingestState) error { if err := state.stream.finishAndFlushTrailer(); err != nil { return err } + if len(state.stream.packTrailer) != state.algo.Size() { return fmt.Errorf("format/pack/ingest: invalid trailer size") } + packHash, err := objectid.FromBytes(state.algo, state.stream.packTrailer) if err != nil { return err } + state.packHash = packHash return state.stream.flush() @@ -115,6 +118,7 @@ func scanOneEntry(state *ingestState, startOffset uint64) (uint64, error) { } record.packedLen = endOffset - startOffset + record.dataOffset = startOffset + uint64(record.headerLen) if record.packedLen < uint64(record.headerLen) { return 0, &ErrMalformedPackEntry{Offset: startOffset, Reason: "negative payload span"} @@ -124,6 +128,7 @@ func scanOneEntry(state *ingestState, startOffset uint64) (uint64, error) { if err != nil { return 0, err } + record.crc32 = crc if packfmt.IsBaseObjectType(record.packedType) { @@ -134,6 +139,7 @@ func scanOneEntry(state *ingestState, startOffset uint64) (uint64, error) { recordIdx := len(state.records) state.records = append(state.records, record) + state.offsetToRecord[record.offset] = recordIdx if record.resolved { state.objectToRecord[record.objectID] = recordIdx @@ -158,6 +164,7 @@ func scanOneEntry(state *ingestState, startOffset uint64) (uint64, error) { // parseEntryPrefix parses one entry prefix from stream. func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, error) { var record objectRecord + record.offset = startOffset first, err := state.stream.ReadByte() @@ -176,13 +183,16 @@ func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, err if err != nil { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: fmt.Sprintf("read size continuation: %v", err)} } + headerLen++ size |= int64(b&0x7f) << shift shift += 7 } + if size < 0 { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: "negative declared size"} } + record.declaredSize = size switch record.packedType { @@ -192,10 +202,12 @@ func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, err if err := state.stream.readFull(baseRaw); err != nil { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: fmt.Sprintf("read ref base: %v", err)} } + baseID, err := objectid.FromBytes(state.algo, baseRaw) if err != nil { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: fmt.Sprintf("parse ref base: %v", err)} } + record.baseObject = baseID headerLen += uint32(len(baseRaw)) case objecttype.TypeOfsDelta: @@ -203,9 +215,11 @@ func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, err if err != nil { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: err.Error()} } + if startOffset <= dist { return record, &ErrMalformedPackEntry{Offset: startOffset, Reason: "ofs base offset out of bounds"} } + record.baseOffset = startOffset - dist headerLen += uint32(consumed) case objecttype.TypeInvalid, objecttype.TypeFuture: @@ -223,13 +237,16 @@ func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, err // (inflatedLength, consumedInput, oidForBaseEntry). func drainEntryPayload(state *ingestState, record objectRecord) (int64, uint64, objectid.ObjectID, error) { var zero objectid.ObjectID + reader, err := zlib.NewReader(state.stream) if err != nil { return 0, 0, zero, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("open zlib stream: %v", err)} } + defer func() { _ = reader.Close() }() var total int64 + if packfmt.IsBaseObjectType(record.packedType) { header, ok := objectheader.Encode(record.packedType, record.declaredSize) if !ok { @@ -240,12 +257,14 @@ func drainEntryPayload(state *ingestState, record objectRecord) (int64, uint64, if err != nil { return 0, 0, zero, err } + _, _ = hashImpl.Write(header) n, err := io.Copy(hashImpl, reader) if err != nil { return 0, 0, zero, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("inflate base object: %v", err)} } + total = n oid, err := objectid.FromBytes(state.algo, hashImpl.Sum(nil)) @@ -261,6 +280,7 @@ func drainEntryPayload(state *ingestState, record objectRecord) (int64, uint64, if err != nil { return 0, 0, zero, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("inflate delta payload: %v", err)} } + total = n return total, reader.InputConsumed(), zero, nil @@ -278,12 +298,14 @@ func readOfsDistanceFromStream(reader io.ByteReader) (uint64, int, error) { dist := uint64(first & 0x7f) consumed := 1 + b := first for b&0x80 != 0 { b, err = reader.ReadByte() if err != nil { return 0, 0, fmt.Errorf("read ofs distance continuation: %w", err) } + consumed++ dist = ((dist + 1) << 7) + uint64(b&0x7f) } diff --git a/format/pack/ingest/temp.go b/format/pack/ingest/temp.go index b8c947ec..d0297ace 100644 --- a/format/pack/ingest/temp.go +++ b/format/pack/ingest/temp.go @@ -24,6 +24,7 @@ func openTemporaryArtifacts(state *ingestState) error { } revName := "" + var revFile *os.File if state.writeRev { revName, revFile, err = createTempFile(state.destination, "tmp_rev_") @@ -54,18 +55,23 @@ func closeTemporaryArtifacts(state *ingestState) error { if err := state.packFile.Close(); err != nil && out == nil { out = err } + state.packFile = nil } + if state.idxFile != nil { if err := state.idxFile.Close(); err != nil && out == nil { out = err } + state.idxFile = nil } + if state.revFile != nil { if err := state.revFile.Close(); err != nil && out == nil { out = err } + state.revFile = nil } @@ -76,10 +82,12 @@ func closeTemporaryArtifacts(state *ingestState) error { func createTempFile(root *os.Root, prefix string) (string, *os.File, error) { for range 32 { name := prefix + rand.Text() + file, err := root.OpenFile(name, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0o644) if err == nil { return name, file, nil } + if errors.Is(err, fs.ErrExist) { continue } diff --git a/format/pack/ingest/thin_fix.go b/format/pack/ingest/thin_fix.go index 249fe136..15fe7674 100644 --- a/format/pack/ingest/thin_fix.go +++ b/format/pack/ingest/thin_fix.go @@ -17,26 +17,32 @@ func maybeFixThin(state *ingestState) error { if len(state.unresolvedRefDeltas) == 0 { return nil } + if !state.fixThin { return &ErrThinPackUnresolved{Count: len(state.unresolvedRefDeltas)} } + if state.base == nil { return &ErrThinPackUnresolved{Count: len(state.unresolvedRefDeltas)} } hashSize := int64(state.algo.Size()) + info, err := state.packFile.Stat() if err != nil { return err } + size := info.Size() if size < hashSize { return fmt.Errorf("format/pack/ingest: pack too short to trim trailer") } + newEnd := size - hashSize if err := state.packFile.Truncate(newEnd); err != nil { return err } + state.stream.consumed = uint64(newEnd) baseIDs := unresolvedThinBaseIDs(state) @@ -45,9 +51,11 @@ func maybeFixThin(state *ingestState) error { if err != nil { continue } + if _, err := appendBaseObject(state, id, ty, content); err != nil { return err } + state.thinFixed = true } @@ -61,6 +69,7 @@ func maybeFixThin(state *ingestState) error { // appendBaseObject appends one base object as a new packed non-delta entry. func appendBaseObject(state *ingestState, id objectid.ObjectID, realType objecttype.Type, content []byte) (int, error) { start := state.stream.consumed + header := encodePackEntryHeader(realType, int64(len(content))) if _, err := state.packFile.WriteAt(header, int64(start)); err != nil { return 0, err @@ -70,10 +79,12 @@ func appendBaseObject(state *ingestState, id objectid.ObjectID, realType objectt crc := crc32.NewIEEE() _, _ = crc.Write(header) counting := &countingWriter{dst: section} + zw := zlib.NewWriter(io.MultiWriter(counting, crc)) if _, err := zw.Write(content); err != nil { return 0, err } + if err := zw.Close(); err != nil { return 0, err } @@ -116,6 +127,7 @@ func (writer *fileSectionWriter) Write(src []byte) (int, error) { if len(src) == 0 { return 0, nil } + n, err := writer.file.WriteAt(src, writer.off+writer.pos) writer.pos += int64(n) @@ -140,6 +152,7 @@ func (writer *countingWriter) Write(src []byte) (int, error) { func rewritePackHeaderAndTrailer(state *ingestState) error { var countRaw [4]byte binary.BigEndian.PutUint32(countRaw[:], uint32(len(state.records))) + if _, err := state.packFile.WriteAt(countRaw[:], 8); err != nil { return err } @@ -148,29 +161,35 @@ func rewritePackHeaderAndTrailer(state *ingestState) error { if err != nil { return err } + endWithoutTrailer := info.Size() hashImpl, err := state.algo.New() if err != nil { return err } + var ( buf [128 << 10]byte pos int64 ) for pos < endWithoutTrailer { want := int64(len(buf)) + remaining := endWithoutTrailer - pos if remaining < want { want = remaining } + n, err := state.packFile.ReadAt(buf[:want], pos) if err != nil && err != io.EOF { return err } + if n == 0 { return io.ErrUnexpectedEOF } + _, _ = hashImpl.Write(buf[:n]) pos += int64(n) } @@ -184,6 +203,7 @@ func rewritePackHeaderAndTrailer(state *ingestState) error { if err != nil { return err } + state.packHash = packHash state.objectCountHeader = uint32(len(state.records)) state.stream.consumed = uint64(endWithoutTrailer + int64(len(sum))) @@ -194,9 +214,11 @@ func rewritePackHeaderAndTrailer(state *ingestState) error { // encodePackEntryHeader encodes one non-delta packed entry header. func encodePackEntryHeader(ty objecttype.Type, size int64) []byte { var out [16]byte + n := 0 s := uint64(size) c := byte((uint8(ty) << 4) | byte(s&0x0f)) + s >>= 4 for s != 0 { out[n] = c | 0x80 @@ -204,6 +226,7 @@ func encodePackEntryHeader(ty objecttype.Type, size int64) []byte { c = byte(s & 0x7f) s >>= 7 } + out[n] = c n++ |
