diff options
| author | 2026-03-05 18:24:40 +0800 | |
|---|---|---|
| committer | 2026-03-05 19:05:47 +0800 | |
| commit | 57f1818d547f2f1dca38033b4e29f62d89ef80f9 (patch) | |
| tree | 88d55ac38e2427860bf380c8cce42fcb3bb1e9ee /format/pack/ingest/resolve.go | |
| parent | internal/compress/zlib: Use flate's compression consumed counter (diff) | |
| signature | No signature | |
format/pack/ingest: Init
Diffstat (limited to 'format/pack/ingest/resolve.go')
| -rw-r--r-- | format/pack/ingest/resolve.go | 279 |
1 files changed, 279 insertions, 0 deletions
diff --git a/format/pack/ingest/resolve.go b/format/pack/ingest/resolve.go new file mode 100644 index 00000000..c6336d18 --- /dev/null +++ b/format/pack/ingest/resolve.go @@ -0,0 +1,279 @@ +package ingest + +import ( + "errors" + "fmt" + "io" + "slices" + + deltaapply "codeberg.org/lindenii/furgit/format/delta/apply" + packfmt "codeberg.org/lindenii/furgit/format/pack" + "codeberg.org/lindenii/furgit/internal/compress/zlib" + "codeberg.org/lindenii/furgit/objectheader" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objecttype" +) + +var errExternalThinBase = errors.New("format/pack/ingest: external thin base required") + +// resolveAll resolves all delta records and finalizes ObjectID/RealType for every record. +func resolveAll(state *ingestState) error { + state.unresolvedRefDeltas = state.unresolvedRefDeltas[:0] + + for idx := range state.records { + if state.records[idx].resolved { + continue + } + + visiting := make(map[int]struct{}) + ty, content, err := resolveRecord(state, idx, visiting) + if err != nil { + if errors.Is(err, errExternalThinBase) { + state.unresolvedRefDeltas = append(state.unresolvedRefDeltas, idx) + continue + } + + return err + } + + id, err := hashCanonicalObject(state.algo, ty, content) + if err != nil { + return err + } + + record := &state.records[idx] + record.realType = ty + record.objectID = id + record.resolved = true + state.objectToRecord[id.String()] = idx + state.baseCache.add(idx, ty, content) + } + + return nil +} + +// resolveRecord resolves one record and returns canonical type/content. +func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (objecttype.Type, []byte, error) { + if idx < 0 || idx >= len(state.records) { + return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record index out of bounds") + } + + if _, ok := visiting[idx]; ok { + return objecttype.TypeInvalid, nil, &ErrDeltaCycle{Offset: state.records[idx].offset} + } + visiting[idx] = struct{}{} + defer delete(visiting, idx) + + record := &state.records[idx] + if ty, content, ok := state.baseCache.get(idx); ok { + return ty, content, nil + } + + if packfmt.IsBaseObjectType(record.packedType) { + ty, content, err := readBaseRecordContent(state, idx) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + if record.resolved { + state.baseCache.add(idx, record.realType, content) + + return record.realType, content, nil + } + + id, err := hashCanonicalObject(state.algo, ty, content) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + record.objectID = id + record.realType = ty + record.resolved = true + state.objectToRecord[id.String()] = idx + state.baseCache.add(idx, ty, content) + + return ty, content, nil + } + + var ( + baseType objecttype.Type + baseContent []byte + err error + ) + switch record.packedType { + case objecttype.TypeOfsDelta: + baseIdx, ok := state.offsetToRecord[record.baseOffset] + if !ok { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: "missing ofs-delta base entry", + } + } + baseType, baseContent, err = resolveRecord(state, baseIdx, visiting) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + case objecttype.TypeRefDelta: + baseIdx, ok := state.objectToRecord[record.baseObject.String()] + if ok { + baseType, baseContent, err = resolveRecord(state, baseIdx, visiting) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + } else { + return objecttype.TypeInvalid, nil, errExternalThinBase + } + default: + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: "unsupported delta type", + } + } + + ty, content, err := applyDeltaRecord(state, idx, baseType, baseContent) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + id, err := hashCanonicalObject(state.algo, ty, content) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + record.objectID = id + record.realType = ty + record.resolved = true + state.objectToRecord[id.String()] = idx + state.baseCache.add(idx, ty, content) + + return ty, content, nil +} + +// readBaseRecordContent reads canonical base content for one non-delta record. +func readBaseRecordContent(state *ingestState, idx int) (objecttype.Type, []byte, error) { + record := state.records[idx] + if !packfmt.IsBaseObjectType(record.packedType) { + return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record %d is not a base object", idx) + } + + content, err := inflateRecordPayload(state, idx) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + if int64(len(content)) != record.declaredSize { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("base content size mismatch got %d want %d", len(content), record.declaredSize), + } + } + + return record.packedType, content, nil +} + +// applyDeltaRecord applies one delta record onto base content. +func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, baseContent []byte) (objecttype.Type, []byte, error) { + record := state.records[idx] + if record.packedType != objecttype.TypeOfsDelta && record.packedType != objecttype.TypeRefDelta { + return objecttype.TypeInvalid, nil, fmt.Errorf("format/pack/ingest: record %d is not a delta record", idx) + } + + deltaPayload, err := inflateRecordPayload(state, idx) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + if int64(len(deltaPayload)) != record.declaredSize { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("delta payload size mismatch got %d want %d", len(deltaPayload), record.declaredSize), + } + } + srcSize, dstSize, err := readDeltaHeaderSizes(deltaPayload) + if err != nil { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("read delta header: %v", err), + } + } + if srcSize != len(baseContent) { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("delta source size mismatch got %d want %d", srcSize, len(baseContent)), + } + } + + content, err := deltaapply.Apply(baseContent, deltaPayload) + if err != nil { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("apply delta: %v", err), + } + } + if len(content) != dstSize { + return objecttype.TypeInvalid, nil, &ErrMalformedPackEntry{ + Offset: record.offset, + Reason: fmt.Sprintf("delta result size mismatch got %d want %d", len(content), dstSize), + } + } + + return baseType, content, nil +} + +// inflateRecordPayload inflates one record's zlib payload from pack file. +func inflateRecordPayload(state *ingestState, idx int) ([]byte, error) { + record := state.records[idx] + if record.packedLen < uint64(record.headerLen) { + return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: "entry packed span underflow"} + } + compressedOffset := record.offset + uint64(record.headerLen) + compressedLen := record.packedLen - uint64(record.headerLen) + section := io.NewSectionReader(state.packFile, int64(compressedOffset), int64(compressedLen)) + + reader, err := zlib.NewReader(section) + if err != nil { + return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("open payload zlib: %v", err)} + } + defer func() { _ = reader.Close() }() + + out, err := io.ReadAll(reader) + if err != nil { + return nil, &ErrMalformedPackEntry{Offset: record.offset, Reason: fmt.Sprintf("inflate payload: %v", err)} + } + + return out, nil +} + +// hashCanonicalObject hashes canonical object bytes (header+content). +func hashCanonicalObject(algo objectid.Algorithm, ty objecttype.Type, content []byte) (objectid.ObjectID, error) { + header, ok := objectheader.Encode(ty, int64(len(content))) + if !ok { + return objectid.ObjectID{}, fmt.Errorf("format/pack/ingest: encode object header for type %d", ty) + } + + hashImpl, err := algo.New() + if err != nil { + return objectid.ObjectID{}, err + } + _, _ = hashImpl.Write(header) + _, _ = hashImpl.Write(content) + + return objectid.FromBytes(algo, hashImpl.Sum(nil)) +} + +// unresolvedThinBaseIDs returns sorted unique unresolved ref base IDs. +func unresolvedThinBaseIDs(state *ingestState) []objectid.ObjectID { + seen := make(map[string]objectid.ObjectID) + for _, idx := range state.unresolvedRefDeltas { + record := state.records[idx] + if record.packedType != objecttype.TypeRefDelta { + continue + } + seen[record.baseObject.String()] = record.baseObject + } + + out := make([]objectid.ObjectID, 0, len(seen)) + for _, id := range seen { + out = append(out, id) + } + slices.SortFunc(out, func(a, b objectid.ObjectID) int { + return slices.Compare(a.Bytes(), b.Bytes()) + }) + + return out +} |
