diff options
| author | 2026-06-11 08:01:10 +0000 | |
|---|---|---|
| committer | 2026-06-11 08:01:10 +0000 | |
| commit | 40cad947580b60ffb9ff003dbc4ef59bdd9cd28e (patch) | |
| tree | 6d9cc84882dd668e300d2bb7afcbf7742a811306 | |
| parent | internal/format/packfile: Refactor ofs delta distances (diff) | |
internal/format/packfile: Add entry header parsing
| -rw-r--r-- | internal/format/packfile/entry_header.go | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go new file mode 100644 index 00000000..04d529d5 --- /dev/null +++ b/internal/format/packfile/entry_header.go @@ -0,0 +1,164 @@ +package packfile + +import ( + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" +) + +// ErrMalformedEntryHeader reports that +// a packfile entry header is truncated, overlong, +// has an unsupported entry type, +// or declares a size that overflows uint64. +var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header") + +// MaxTypeSizeLen is the maximum encoded length +// of the type/size prefix of an entry header. +// Every uint64 size is encodable within this bound, +// and [ParseEntryHeader] rejects longer prefixes. +const MaxTypeSizeLen = 10 + +// MaxEntryHeaderLen returns the maximum encoded length +// of a full entry header +// for packs whose object IDs are hashSize bytes. +// +// Callers parsing from a stream may buffer +// MaxEntryHeaderLen bytes +// (or fewer if the pack data ends sooner) +// and parse with [ParseEntryHeader]; +// no valid entry header is longer. +func MaxEntryHeaderLen(hashSize int) int { + return MaxTypeSizeLen + max(hashSize, MaxOfsDeltaDistanceLen) +} + +// EntryHeader is one parsed packfile entry header: +// everything from the start of the entry +// up to its zlib payload. +type EntryHeader struct { + // Type is the packfile entry type. + Type EntryType + + // Size is the declared inflated size + // of the entry's payload. + // For delta entries this is the delta size, + // not the reconstructed object size. + Size uint64 + + // HeaderLen is the number of bytes + // the header occupies in the pack; + // the zlib payload begins HeaderLen bytes + // after the start of the entry. + HeaderLen int + + // RefBase holds the base object ID + // for ref-delta entries. + // Only the first hashSize bytes are meaningful. + RefBase [id.MaxObjectIDSize]byte + + // OfsDistance is the backward distance + // from the start of this entry + // to the start of the base entry, + // for ofs-delta entries. + OfsDistance uint64 +} + +// ParseEntryHeader parses one packfile entry header +// from the beginning of data. +// +// hashSize must be the object ID size +// of the pack's object format. +// +// data need not contain the whole entry; +// [MaxEntryHeaderLen] bytes always suffice. +// Headers of types [EntryTypeInvalid] and [EntryTypeFuture] +// are rejected as malformed. +func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) { + var zero EntryHeader + + if hashSize <= 0 || hashSize > id.MaxObjectIDSize { + return zero, fmt.Errorf("internal/format/packfile: invalid hash size %d", hashSize) + } + + if len(data) == 0 { + return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader) + } + + first := data[0] + header := EntryHeader{ + Type: EntryType((first >> 4) & 0x07), + Size: uint64(first & 0x0f), + HeaderLen: 1, + } + + shift := uint(4) + + b := first + for b&0x80 != 0 { + if header.HeaderLen >= MaxTypeSizeLen { + return zero, fmt.Errorf("%w: overlong type/size prefix", ErrMalformedEntryHeader) + } + + if header.HeaderLen >= len(data) { + return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader) + } + + b = data[header.HeaderLen] + header.HeaderLen++ + + group := uint64(b & 0x7f) + if group<<shift>>shift != group { + return zero, fmt.Errorf("%w: size overflows uint64", ErrMalformedEntryHeader) + } + + header.Size |= group << shift + shift += 7 + } + + switch header.Type { + case EntryTypeCommit, EntryTypeTree, EntryTypeBlob, EntryTypeTag: + // Base entries have nothing between the type/size prefix and the payload. + case EntryTypeRefDelta: + end := header.HeaderLen + hashSize + if end > len(data) { + return zero, fmt.Errorf("%w: truncated ref-delta base ID", ErrMalformedEntryHeader) + } + + copy(header.RefBase[:], data[header.HeaderLen:end]) + header.HeaderLen = end + case EntryTypeOfsDelta: + dist, consumed, err := ParseOfsDeltaDistance(data[header.HeaderLen:]) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedEntryHeader, err) + } + + header.OfsDistance = dist + header.HeaderLen += consumed + case EntryTypeInvalid, EntryTypeFuture: + return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader) + default: + return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader) + } + + return header, nil +} + +// AppendTypeSize appends the type/size prefix encoding +// of an entry header to dst. +// +// entryType must be a valid on-disk entry type; +// [EntryTypeInvalid] and [EntryTypeFuture] and +// values that do not fit in three bits +// produce garbage encodings. +func AppendTypeSize(dst []byte, entryType EntryType, size uint64) []byte { + b := byte(entryType)<<4 | byte(size&0x0f) + size >>= 4 + + for size != 0 { + dst = append(dst, b|0x80) + b = byte(size & 0x7f) + size >>= 7 + } + + return append(dst, b) +} |
