package ingest import ( "fmt" "io" "lindenii.org/go/furgit/internal/compress/zlib" "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packfile/delta" "lindenii.org/go/furgit/internal/progress" "lindenii.org/go/furgit/object/header" "lindenii.org/go/furgit/object/id" ) // adjacency maps each resolvable base to its delta children: // ofs-deltas keyed by base offset, ref-deltas keyed by base object ID. type adjacency struct { byOffset map[int][]int byOID map[id.ObjectID][]int } // resolveDeltas resolves every delta record into a final object ID and type, // completing thin packs from the external base reader when required. func (ingestion *ingestion) resolveDeltas() error { meter := progress.New(progress.Options{ Writer: ingestion.opts.Progress, Title: "resolving deltas", Total: ingestion.countDeltas(), Delay: 0, Sparse: false, Throughput: false, }) adjacency := ingestion.buildAdjacency() err := ingestion.resolveFrom(ingestion.resolvedRoots(), adjacency, meter) if err != nil { return err } external := ingestion.unresolvedExternalBases() switch { case len(external) == 0 && ingestion.countUnresolved() > 0: return fmt.Errorf("%w: unresolvable delta entries", ErrMalformedPack) case len(external) > 0: err = ingestion.fixThin(external, adjacency, meter) if err != nil { return err } } meter.Stop("done") return nil } // buildAdjacency indexes every delta record by its base, // so a resolved base can find the children that delta against it. func (ingestion *ingestion) buildAdjacency() adjacency { out := adjacency{ byOffset: make(map[int][]int), byOID: make(map[id.ObjectID][]int), } for index := range ingestion.records { rec := &ingestion.records[index] switch rec.packedType { case packfile.EntryTypeOfsDelta: out.byOffset[rec.baseOffset] = append(out.byOffset[rec.baseOffset], index) case packfile.EntryTypeRefDelta: out.byOID[rec.baseOID] = append(out.byOID[rec.baseOID], index) case packfile.EntryTypeInvalid, packfile.EntryTypeCommit, packfile.EntryTypeTree, packfile.EntryTypeBlob, packfile.EntryTypeTag, packfile.EntryTypeFuture: } } return out } // resolveFrom resolves the delta subtree rooted at each resolved record. func (ingestion *ingestion) resolveFrom(roots []int, adjacency adjacency, meter *progress.Meter) error { for _, root := range roots { content, err := ingestion.inflateRecord(root) if err != nil { return err } err = ingestion.resolveSubtree(root, content, ingestion.records[root].objectType, 0, adjacency, meter) if err != nil { return err } } return nil } // resolveSubtree resolves every delta child of one resolved record at depth, // holding the record's content as the base for its children. func (ingestion *ingestion) resolveSubtree( index int, content []byte, objectType packfile.EntryType, depth int, adjacency adjacency, meter *progress.Meter, ) error { rec := &ingestion.records[index] for _, child := range adjacency.byOffset[rec.offset] { err := ingestion.resolveChild(child, content, objectType, depth+1, adjacency, meter) if err != nil { return err } } for _, child := range adjacency.byOID[rec.oid] { err := ingestion.resolveChild(child, content, objectType, depth+1, adjacency, meter) if err != nil { return err } } return nil } // resolveChild applies one delta record at depth against its base content, // finalizes the record, and recurses into its own children. func (ingestion *ingestion) resolveChild( index int, baseContent []byte, baseType packfile.EntryType, depth int, adjacency adjacency, meter *progress.Meter, ) error { rec := &ingestion.records[index] if rec.resolved { return nil } if depth > delta.MaxChainDepth { return fmt.Errorf("%w: entry at %d: delta chain too deep", ErrMalformedPack, rec.offset) } deltaPayload, err := ingestion.inflateRecord(index) if err != nil { return err } baseSize, resultSize, _, err := delta.ParseHeaderSizes(deltaPayload) if err != nil { return fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) } if baseSize != uint64(len(baseContent)) { return fmt.Errorf("%w: entry at %d: delta base size mismatch", ErrMalformedPack, rec.offset) } content, err := delta.Apply(baseContent, deltaPayload) if err != nil { return fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) } if uint64(len(content)) != resultSize { return fmt.Errorf("%w: entry at %d: delta result size mismatch", ErrMalformedPack, rec.offset) } oid, err := ingestion.hashObject(baseType, content) if err != nil { return err } rec.objectType = baseType rec.oid = oid rec.resolved = true ingestion.byOID[oid] = index ingestion.deltasResolved++ meter.Set(ingestion.deltasResolved, 0) return ingestion.resolveSubtree(index, content, baseType, depth, adjacency, meter) } // inflateRecord inflates one record's payload from the temporary pack file. func (ingestion *ingestion) inflateRecord(index int) ([]byte, error) { rec := &ingestion.records[index] offset := int64(rec.dataOffset()) compressedLen := int64(rec.packedLen - rec.headerLen) size := rec.declaredSize zr, err := zlib.NewReader(io.NewSectionReader(ingestion.packFile, offset, compressedLen)) if err != nil { return nil, fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) } defer func() { _ = zr.Close() }() out := make([]byte, size) _, err = io.ReadFull(zr, out) if err != nil { return nil, fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) } return out, nil } // hashObject computes the object ID of one resolved object. func (ingestion *ingestion) hashObject(objectType packfile.EntryType, content []byte) (id.ObjectID, error) { var zero id.ObjectID ty, err := objectType.ObjectType() if err != nil { return zero, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } hashImpl, err := ingestion.objectFormat.New() if err != nil { return zero, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } _, _ = hashImpl.Write(header.Append(nil, ty, len(content))) _, _ = hashImpl.Write(content) oid, err := ingestion.objectFormat.FromBytes(hashImpl.Sum(nil)) if err != nil { return zero, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } return oid, nil } // resolvedRoots returns the indices of every currently resolved record. func (ingestion *ingestion) resolvedRoots() []int { var roots []int for index := range ingestion.records { if ingestion.records[index].resolved { roots = append(roots, index) } } return roots } // countDeltas returns the number of delta records. func (ingestion *ingestion) countDeltas() int { return ingestion.deltaCount } // countUnresolved returns the number of records that remain unresolved. // // Every base is resolved during scanning or thin completion, // so the unresolved records are exactly the unresolved deltas: // the delta records minus those already resolved. func (ingestion *ingestion) countUnresolved() int { return ingestion.deltaCount - ingestion.deltasResolved } // unresolvedExternalBases returns the unique base object IDs // of unresolved ref-deltas whose base is not present in the pack, // in first-reference order. func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID { seen := make(map[id.ObjectID]struct{}) out := make([]id.ObjectID, 0, ingestion.deltaCount-ingestion.deltasResolved) for index := range ingestion.records { rec := &ingestion.records[index] if rec.resolved || rec.packedType != packfile.EntryTypeRefDelta { continue } if _, ok := ingestion.byOID[rec.baseOID]; ok { continue } if _, ok := seen[rec.baseOID]; ok { continue } seen[rec.baseOID] = struct{}{} out = append(out, rec.baseOID) } return out }