diff options
228 files changed, 5008 insertions, 12512 deletions
diff --git a/.golangci.yaml b/.golangci.yaml index 6b1a2d04..ac944be6 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -10,6 +10,10 @@ linters: - path: internal/testgit linters: - err113 # test helpers report ad-hoc git failures; static errors add no value here + - path: cmd/ + linters: + - err113 + - wrapcheck disable: - dupword # extremely normal in tests and a pretty unnecessary linter - goconst # unnecessary especially for our parsing code; many false positives @@ -27,6 +31,7 @@ linters: - dogsled # definitely not an issue, ignoring returns is normal - gomodguard # deprecated - exhaustruct # currently broken, will turn it on again when v4 gets into golangci-lint + - ireturn # much more noise than info settings: perfsprint: @@ -49,6 +54,7 @@ linters: - $gostd - lindenii.org/go/furgit - lindenii.org/go/lgo + - github.com/klauspost/compress - golang.org/x revive: rules: @@ -20,12 +20,6 @@ network/protocol/v0v1/server/receivepack network/receivepack network/receivepack/hooks network/receivepack/service -object/store/dual -object/store/loose -object/store/packed -object/store/packed/internal -object/store/packed/internal/ingest -object/store/packed/internal/reading reachability ref/store ref/store/chain @@ -1,7 +1,7 @@ * The append functions are not transactional. Or perhaps we should make them transactional? -* Perhaps no GIGO in serialization +* Perhaps no GIGO in serialization * Too strict about tagger? @@ -12,20 +12,8 @@ * Check error wrapping in object/fetch * Pack ingestion - * v2 resolver: explicit work-stack with evicting clock.Clock base cache - bounds base-object memory so a zlib-bomb base can't OOM us - needed for parallelism - * Incremental thin completion: append one base then cascade, like git, - instead of batch collecting then cascade, maybe? - * Partial promote orphan cleanup; audit contract too - * Strict mode or otherwise structurally validate trees/commits/tags * SHA-1 collision detection - * Accept a context - * Multithreading - * Over-read footgun -* Object ownership - * how cloning objects works, how mutating objects from stored.Stored - behaves, who owns the memory object fields reference, and where copying - can be reduced. - * which object ID length mismatches should panic vs error +* Pack store maint, gc, etc + +* which object ID length mismatches should panic vs error diff --git a/cmd/doc.go b/cmd/doc.go new file mode 100644 index 00000000..119a6522 --- /dev/null +++ b/cmd/doc.go @@ -0,0 +1,3 @@ +// Package cmd provides some commands for testing furgit +// and inspecting git repositories. +package cmd diff --git a/cmd/explain-pack/delta.go b/cmd/explain-pack/delta.go new file mode 100644 index 00000000..d0437d51 --- /dev/null +++ b/cmd/explain-pack/delta.go @@ -0,0 +1,120 @@ +package main + +import ( + "fmt" +) + +func (explainer *explainer) walkDelta(base, payload []byte, pos int) ([]byte, bool, error) { + explainer.printf("\tdelta\n") + + building := base != nil + + var result []byte + + insn := 0 + + for pos < len(payload) { + op := payload[pos] + pos++ + insn++ + + switch { + case op&0x80 != 0: + next, seg, err := explainer.decodeCopy(base, payload, pos, op) + if err != nil { + return nil, false, err + } + + pos = next + + if building { + result = append(result, seg...) + } + case op != 0: + next, lit, err := explainer.decodeInsert(payload, pos, int(op)) + if err != nil { + return nil, false, err + } + + pos = next + + if building { + result = append(result, lit...) + } + default: + explainer.printf("\t\tinvalid opcode 0x00; stopping delta decode\n") + + return nil, false, nil + } + } + + if !building { + return nil, false, nil + } + + return result, true, nil +} + +func (explainer *explainer) decodeCopy(base, payload []byte, pos int, op byte) (int, []byte, error) { + offset := 0 + + for i := range 4 { + if op&(1<<uint(i)) == 0 { //nolint:gosec + continue + } + + if pos >= len(payload) { + return 0, nil, fmt.Errorf("truncated copy offset") + } + + offset |= int(payload[pos]) << (8 * uint(i)) //nolint:gosec + pos++ + } + + size := 0 + + for i := range 3 { + if op&(1<<uint(4+i)) == 0 { //nolint:gosec + continue + } + + if pos >= len(payload) { + return 0, nil, fmt.Errorf("truncated copy size") + } + + size |= int(payload[pos]) << (8 * uint(i)) //nolint:gosec + pos++ + } + + if size == 0 { + size = 0x10000 + } + + explainer.printf("\t\tcpy %d from %d\n", size, offset) + + if base == nil { + return pos, nil, nil + } + + if offset < 0 || offset+size > len(base) { + return 0, nil, fmt.Errorf("copy of %d byte(s) from base offset %d exceeds base length %d", size, offset, len(base)) + } + + seg := base[offset : offset+size] + hexBlock(explainer.out, "\t\t\t", seg) + + return pos, seg, nil +} + +func (explainer *explainer) decodeInsert(payload []byte, pos, n int) (int, []byte, error) { + if pos+n > len(payload) { + return 0, nil, fmt.Errorf("truncated insert payload") + } + + lit := payload[pos : pos+n] + + explainer.printf("\t\tins %d\n", n) + hexBlock(explainer.out, "\t\t\t", lit) + + return pos + n, lit, nil +} diff --git a/cmd/explain-pack/doc.go b/cmd/explain-pack/doc.go new file mode 100644 index 00000000..f5fcc986 --- /dev/null +++ b/cmd/explain-pack/doc.go @@ -0,0 +1,10 @@ +// Command explain-pack reads a Git packfile and writes a +// human-readable explanation to stdout. +// +// With a pack filename argument +// the pack is mmap'd +// and a sibling .idx is used when present; +// with no argument the pack is read from stdin. +// A packfile does not record its object format, +// so the format must be given with -format. +package main diff --git a/cmd/explain-pack/entry.go b/cmd/explain-pack/entry.go new file mode 100644 index 00000000..0b796ef0 --- /dev/null +++ b/cmd/explain-pack/entry.go @@ -0,0 +1,257 @@ +package main + +import ( + "bytes" + "fmt" + "io" + + "lindenii.org/go/furgit/internal/compress/zlib" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packfile/delta" + "lindenii.org/go/furgit/object/tree" + "lindenii.org/go/lgo/intconv" +) + +func (explainer *explainer) explainEntry(num, count, cursor int) (int, error) { + hashSize := explainer.objectFormat.Size() + + header, err := packfile.ParseEntryHeader(explainer.data[cursor:], hashSize) + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + payloadStart := cursor + header.HeaderLen + if payloadStart > len(explainer.data) { + return 0, fmt.Errorf("entry %d at offset %d: header runs past the end of the pack", num, cursor) + } + + payload, consumed, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + next := payloadStart + consumed + + explainer.printf("object %d of %d\n", num, count) + explainer.printf("\tty\t%s\n", entryTypeLabel(header.Type)) + explainer.printf("\tofs\t%d\n", cursor) + explainer.printf("\thdrsz\t%d\n", header.HeaderLen) + explainer.printf("\tsz\t%d\n", header.Size) + + if uint64(len(payload)) != header.Size { + explainer.printf("\tnote\tdeclared %d byte(s) but inflated to %d\n", header.Size, len(payload)) + } + + if header.Type.IsBase() { + err = explainer.renderBase(cursor, header.Type, payload, consumed) + } else { + err = explainer.renderDelta(cursor, header, payload, consumed) + } + + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + explainer.printf("\n") + + return next, nil +} + +func (explainer *explainer) renderBase(cursor int, entryType packfile.EntryType, content []byte, consumed int) error { + explainer.renderContent(entryType, content) + + explainer.printf("\tzlib\t%d\n", consumed) + + oid, err := explainer.recomputeOID(entryType, content) + if err != nil { + return err + } + + explainer.printf("\toid\t%s\n", oid) + + explainer.oidIndex[oid] = cursor + explainer.cache.Add(cursor, resolvedBase{entryType: entryType, content: content}) + + return nil +} + +func (explainer *explainer) renderDelta(cursor int, header packfile.EntryHeader, payload []byte, consumed int) error { + baseSize, resultSize, pos, err := delta.ParseHeaderSizes(payload) + if err != nil { + return fmt.Errorf("delta header: %w", err) + } + + err = explainer.renderBaseRef(cursor, header) + if err != nil { + return err + } + + explainer.printf("\tbasesz\t%d\n", baseSize) + explainer.printf("\tnewsz\t%d\n", resultSize) + + baseOffset, located, err := explainer.baseOffset(cursor, header) + if err != nil { + return err + } + + var ( + baseType packfile.EntryType + baseContent []byte + baseResolved bool + ) + + if located { + baseType, baseContent, baseResolved, err = explainer.reconstruct(baseOffset, 0) + if err != nil { + return err + } + } + + var walkBase []byte + if baseResolved { + walkBase = baseContent + } + + result, complete, err := explainer.walkDelta(walkBase, payload, pos) + if err != nil { + return err + } + + explainer.printf("\tzlib\t%d\n", consumed) + + switch { + case baseResolved && complete: + if uint64(len(result)) != resultSize { + explainer.printf("\tnote\tdelta produced %d byte(s) but declared %d\n", len(result), resultSize) + } + + explainer.renderContent(baseType, result) + + newOID, err := explainer.recomputeOID(baseType, result) + if err != nil { + return err + } + + explainer.printf("\tnewoid\t%s\n", newOID) + + explainer.oidIndex[newOID] = cursor + explainer.cache.Add(cursor, resolvedBase{entryType: baseType, content: result}) + case !baseResolved: + explainer.printf("\tnote\tbase not available in this pack; cannot reconstruct\n") + default: + explainer.printf("\tnote\tdelta decode incomplete; cannot reconstruct\n") + } + + return nil +} + +func (explainer *explainer) renderBaseRef(cursor int, header packfile.EntryHeader) error { + switch header.Type { + case packfile.EntryTypeOfsDelta: + dist, err := intconv.Uint64ToInt(header.OfsDistance) + if err != nil { + return fmt.Errorf("ofs-delta distance overflows int: %w", err) + } + + explainer.printf("\tbaseofs\t-%d = %d\n", dist, cursor-dist) + case packfile.EntryTypeRefDelta: + baseID, err := explainer.objectFormat.FromBytes(header.RefBase[:explainer.objectFormat.Size()]) + if err != nil { + return fmt.Errorf("ref-delta base ID: %w", err) + } + + explainer.printf("\tbaseoid\t%s\n", baseID) + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } + + return nil +} + +func (explainer *explainer) renderContent(entryType packfile.EntryType, content []byte) { + switch entryType { + case packfile.EntryTypeCommit, packfile.EntryTypeTag: + explainer.printf("\tcontent\n") + indentBlock(explainer.out, "\t\t", content) + case packfile.EntryTypeTree: + explainer.renderTree(content) + case packfile.EntryTypeBlob, + packfile.EntryTypeOfsDelta, + packfile.EntryTypeRefDelta, + packfile.EntryTypeInvalid, + packfile.EntryTypeFuture: + explainer.printf("\thexdump\n") + hexBlock(explainer.out, "\t\t", content) + } +} + +func (explainer *explainer) renderTree(content []byte) { + parsed, err := tree.Parse(content, explainer.objectFormat) + if err != nil { + explainer.printf("\thexdump\t(not a valid tree: %v)\n", err) + hexBlock(explainer.out, "\t\t", content) + + return + } + + explainer.printf("\ttree\n") + + for _, entry := range parsed.Entries() { + mode := string(entry.Mode.Append(nil)) + explainer.printf( + "\t\t%s %s %s\t%s\n", + mode, entry.Mode.ObjectType().Name(), entry.ID, entry.Name, + ) + } +} + +func inflateAt(data []byte) ([]byte, int, error) { + reader := bytes.NewReader(data) + + zr, err := zlib.NewReader(reader) + if err != nil { + return nil, 0, fmt.Errorf("opening zlib stream: %w", err) + } + + content, err := io.ReadAll(zr) + closeErr := zr.Close() + + if err != nil { + return nil, 0, fmt.Errorf("inflating payload: %w", err) + } + + if closeErr != nil { + return nil, 0, fmt.Errorf("closing zlib stream: %w", closeErr) + } + + consumed := len(data) - reader.Len() + + return content, consumed, nil +} + +func entryTypeLabel(entryType packfile.EntryType) string { + switch entryType { + case packfile.EntryTypeCommit: + return "commit" + case packfile.EntryTypeTree: + return "tree" + case packfile.EntryTypeBlob: + return "blob" + case packfile.EntryTypeTag: + return "tag" + case packfile.EntryTypeOfsDelta: + return "ofs-delta" + case packfile.EntryTypeRefDelta: + return "ref-delta" + case packfile.EntryTypeInvalid: + return "invalid" + case packfile.EntryTypeFuture: + return "future" + default: + return fmt.Sprintf("unknown (%d)", entryType) + } +} diff --git a/cmd/explain-pack/fmt.go b/cmd/explain-pack/fmt.go new file mode 100644 index 00000000..a3d1b333 --- /dev/null +++ b/cmd/explain-pack/fmt.go @@ -0,0 +1,30 @@ +package main + +import ( + "bytes" + "encoding/hex" + "io" + + "lindenii.org/go/furgit/internal/utils" +) + +func indentBlock(out io.Writer, indent string, block []byte) { + lines := bytes.Split(block, []byte("\n")) + if n := len(lines); n > 0 && len(lines[n-1]) == 0 { + lines = lines[:n-1] + } + + for _, line := range lines { + utils.BestEffortFprintf(out, "%s%s\n", indent, line) + } +} + +func hexBlock(out io.Writer, indent string, data []byte) { + var buf bytes.Buffer + + dumper := hex.Dumper(&buf) + _, _ = dumper.Write(data) + _ = dumper.Close() + + indentBlock(out, indent, buf.Bytes()) +} diff --git a/cmd/explain-pack/main.go b/cmd/explain-pack/main.go new file mode 100644 index 00000000..af5b7480 --- /dev/null +++ b/cmd/explain-pack/main.go @@ -0,0 +1,240 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io" + "os" + "strings" + + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/mmap" + "lindenii.org/go/furgit/internal/utils" + "lindenii.org/go/furgit/object/id" +) + +func main() { + format := flag.String("format", "", "object format of the pack: sha1 or sha256 (required)") + + flag.Parse() + + err := run(*format, flag.Args(), os.Stdin, os.Stdout) + if err != nil { + fmt.Fprintln(os.Stderr, "explain-pack:", err) + os.Exit(1) + } +} + +type explainer struct { + data []byte + objectFormat id.ObjectFormat + out *bufio.Writer + + idx *packidx.Packidx + + cache *baseCache + oidIndex map[id.ObjectID]int +} + +func run(format string, args []string, stdin io.Reader, stdout io.Writer) error { + if format == "" { + return fmt.Errorf("the -format flag is required (sha1 or sha256)") + } + + objectFormat, err := id.ParseObjectFormat(format) + if err != nil { + return fmt.Errorf("invalid -format %q: %w", format, err) + } + + if len(args) > 1 { + return fmt.Errorf("at most one pack file argument is accepted, got %d", len(args)) + } + + data, idx, closers, err := openInput(args, objectFormat, stdin) + if err != nil { + return err + } + + defer func() { + for _, c := range closers { + _ = c.Close() + } + }() + + out := bufio.NewWriter(stdout) + + explainer := &explainer{ + data: data, + objectFormat: objectFormat, + out: out, + idx: idx, + cache: newBaseCache(), + oidIndex: make(map[id.ObjectID]int), + } + + err = explainer.explain() + if err != nil { + return err + } + + return out.Flush() +} + +func openInput(args []string, objectFormat id.ObjectFormat, stdin io.Reader) ([]byte, *packidx.Packidx, []io.Closer, error) { + if len(args) == 0 { + data, err := io.ReadAll(stdin) + if err != nil { + return nil, nil, nil, fmt.Errorf("reading pack from stdin: %w", err) + } + + return data, nil, nil, nil + } + + packPath := args[0] + + packMapping, err := mapPath(packPath) + if err != nil { + return nil, nil, nil, err + } + + closers := []io.Closer{packMapping} + + idx, idxMapping, err := openIndex(packPath, objectFormat) + if err != nil { + _ = packMapping.Close() + + return nil, nil, nil, err + } + + if idxMapping != nil { + closers = append(closers, idxMapping) + } + + return packMapping.Data(), idx, closers, nil +} + +func openIndex(packPath string, objectFormat id.ObjectFormat) (*packidx.Packidx, *mmap.Mmap, error) { + idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" + + file, err := os.Open(idxPath) //#nosec G304 + if err != nil { + if os.IsNotExist(err) { + return nil, nil, nil + } + + return nil, nil, fmt.Errorf("opening index %q: %w", idxPath, err) + } + + defer func() { _ = file.Close() }() + + mapping, err := mmap.Open(file) + if err != nil { + return nil, nil, fmt.Errorf("mapping index %q: %w", idxPath, err) + } + + idx, err := packidx.Parse(mapping.Data(), objectFormat.Size()) + if err != nil { + _ = mapping.Close() + + return nil, nil, fmt.Errorf("parsing index %q: %w", idxPath, err) + } + + return &idx, mapping, nil +} + +func mapPath(path string) (*mmap.Mmap, error) { + file, err := os.Open(path) //#nosec G304 + if err != nil { + return nil, fmt.Errorf("opening pack %q: %w", path, err) + } + + defer func() { _ = file.Close() }() + + mapping, err := mmap.Open(file) + if err != nil { + return nil, fmt.Errorf("mapping pack %q: %w", path, err) + } + + return mapping, nil +} + +func (explainer *explainer) printf(format string, args ...any) { + utils.BestEffortFprintf(explainer.out, format, args...) +} + +func (explainer *explainer) explain() error { + hashSize := explainer.objectFormat.Size() + + if len(explainer.data) < packfile.HeaderLen+hashSize { + return fmt.Errorf("pack is too short to contain a header and a %d-byte trailer", hashSize) + } + + count, err := explainer.explainHeader() + if err != nil { + return err + } + + cursor := packfile.HeaderLen + + for num := 1; num <= count; num++ { + next, err := explainer.explainEntry(num, count, cursor) + if err != nil { + return err + } + + cursor = next + } + + return explainer.explainTrailer(cursor) +} + +func (explainer *explainer) explainHeader() (int, error) { + header, err := packfile.ParseHeader(explainer.data[:packfile.HeaderLen]) + if err != nil { + return 0, fmt.Errorf("pack header: %w", err) + } + + explainer.printf("pack header\n") + explainer.printf("\tmagic\t\"PACK\"\n") + explainer.printf("\tversion\t2\n") + explainer.printf("\tobjects\t%d\n", header.ObjectCount) + explainer.printf("\n") + + return int(header.ObjectCount), nil +} + +func (explainer *explainer) explainTrailer(cursor int) error { + hashSize := explainer.objectFormat.Size() + trailerStart := len(explainer.data) - hashSize + + if cursor != trailerStart { + explainer.printf( + "note\t%d byte(s) between the last entry and the trailer were unaccounted for\n", + trailerStart-cursor, + ) + } + + trailer := explainer.data[trailerStart:] + + explainer.printf("pack trailer\n") + explainer.printf("\tchecksum\t%s\n", hex.EncodeToString(trailer)) + + hashImpl, err := explainer.objectFormat.New() + if err != nil { + return fmt.Errorf("object/store: %w", err) + } + + _, _ = hashImpl.Write(explainer.data[:trailerStart]) + + if bytes.Equal(hashImpl.Sum(nil), trailer) { + explainer.printf("\trecomputed\tmatches\n") + } else { + explainer.printf("\trecomputed\tMISMATCH (corrupt pack or wrong -format)\n") + } + + return nil +} diff --git a/cmd/explain-pack/resolve.go b/cmd/explain-pack/resolve.go new file mode 100644 index 00000000..4396fe19 --- /dev/null +++ b/cmd/explain-pack/resolve.go @@ -0,0 +1,161 @@ +package main + +import ( + "fmt" + + "lindenii.org/go/furgit/internal/cache/clock" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packfile/delta" + "lindenii.org/go/furgit/object/header" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +const baseCacheMaxWeight = 64 << 20 + +type resolvedBase struct { + entryType packfile.EntryType + content []byte +} + +type baseCache = clock.Clock[int, resolvedBase] + +func newBaseCache() *baseCache { + return clock.New(baseCacheMaxWeight, func(_ int, base resolvedBase) uint64 { + return uint64(len(base.content)) + 32 + }) +} + +func (explainer *explainer) reconstruct(offset, depth int) (packfile.EntryType, []byte, bool, error) { + var zero packfile.EntryType + + if depth > delta.MaxChainDepth { + return zero, nil, false, fmt.Errorf("delta chain too deep at offset %d", offset) + } + + if cached, ok := explainer.cache.Get(offset); ok { + return cached.entryType, cached.content, true, nil + } + + header, err := packfile.ParseEntryHeader(explainer.data[offset:], explainer.objectFormat.Size()) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + payloadStart := offset + header.HeaderLen + if payloadStart > len(explainer.data) { + return zero, nil, false, fmt.Errorf("entry at offset %d: header runs past end of pack", offset) + } + + if header.Type.IsBase() { + content, _, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + explainer.cache.Add(offset, resolvedBase{entryType: header.Type, content: content}) + + return header.Type, content, true, nil + } + + baseOffset, ok, err := explainer.baseOffset(offset, header) + if err != nil { + return zero, nil, false, err + } + + if !ok { + return zero, nil, false, nil + } + + baseType, baseContent, ok, err := explainer.reconstruct(baseOffset, depth+1) + if err != nil || !ok { + return zero, nil, ok, err + } + + payload, _, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + content, err := delta.Apply(baseContent, payload) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + explainer.cache.Add(offset, resolvedBase{entryType: baseType, content: content}) + + return baseType, content, true, nil +} + +func (explainer *explainer) baseOffset(offset int, header packfile.EntryHeader) (int, bool, error) { + switch header.Type { + case packfile.EntryTypeOfsDelta: + dist, err := intconv.Uint64ToInt(header.OfsDistance) + if err != nil || dist <= 0 || dist > offset { + return 0, false, fmt.Errorf("entry at offset %d: ofs-delta base out of bounds", offset) + } + + return offset - dist, true, nil + case packfile.EntryTypeRefDelta: + refBytes := header.RefBase[:explainer.objectFormat.Size()] + + if explainer.idx != nil { + baseOffsetU, found, err := explainer.idx.Lookup(refBytes) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: index lookup: %w", offset, err) + } + + if found { + baseOffset, err := intconv.Uint64ToInt(baseOffsetU) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: index base offset overflows int: %w", offset, err) + } + + return baseOffset, true, nil + } + } + + baseID, err := explainer.objectFormat.FromBytes(refBytes) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + if baseOffset, found := explainer.oidIndex[baseID]; found { + return baseOffset, true, nil + } + + return 0, false, nil + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } + + return 0, false, fmt.Errorf("entry at offset %d: not a delta entry", offset) +} + +func (explainer *explainer) recomputeOID(entryType packfile.EntryType, content []byte) (id.ObjectID, error) { + var zero id.ObjectID + + objectType, err := entryType.ObjectType() + if err != nil { + return zero, err + } + + hashImpl, err := explainer.objectFormat.New() + if err != nil { + return zero, err + } + + _, _ = hashImpl.Write(header.Append(nil, objectType, len(content))) + _, _ = hashImpl.Write(content) + + oid, err := explainer.objectFormat.FromBytes(hashImpl.Sum(nil)) + if err != nil { + return zero, err + } + + return oid, nil +} diff --git a/cmd/idx-bloom/doc.go b/cmd/idx-bloom/doc.go new file mode 100644 index 00000000..e7d4e818 --- /dev/null +++ b/cmd/idx-bloom/doc.go @@ -0,0 +1,8 @@ +// Command idx-bloom reads a Git pack index +// and writes an IDBL Bloom filter over its object IDs to stdout. +// +// With an index filename argument the index is read from that file; +// with no argument it is read from stdin. +// A pack index does not record its object format, +// so the format must be given with -format. +package main diff --git a/cmd/idx-bloom/main.go b/cmd/idx-bloom/main.go new file mode 100644 index 00000000..fa471237 --- /dev/null +++ b/cmd/idx-bloom/main.go @@ -0,0 +1,99 @@ +package main + +import ( + "flag" + "fmt" + "io" + "os" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func main() { + format := flag.String("format", "", "object format of the index: sha1 or sha256 (required)") + + flag.Parse() + + err := run(*format, flag.Args(), os.Stdin, os.Stdout) + if err != nil { + fmt.Fprintln(os.Stderr, "idx-bloom:", err) + os.Exit(1) + } +} + +func run(format string, args []string, stdin io.Reader, stdout io.Writer) error { + if format == "" { + return fmt.Errorf("the -format flag is required (sha1 or sha256)") + } + + objectFormat, err := id.ParseObjectFormat(format) + if err != nil { + return fmt.Errorf("invalid -format %q: %w", format, err) + } + + if len(args) > 1 { + return fmt.Errorf("at most one index file argument is accepted, got %d", len(args)) + } + + data, err := readInput(args, stdin) + if err != nil { + return err + } + + index, err := packidx.Parse(data, objectFormat.Size()) + if err != nil { + return fmt.Errorf("parsing index: %w", err) + } + + filter, err := buildFilter(objectFormat, &index) + if err != nil { + return err + } + + _, err = stdout.Write(filter) + if err != nil { + return fmt.Errorf("writing filter: %w", err) + } + + return nil +} + +func readInput(args []string, stdin io.Reader) ([]byte, error) { + if len(args) == 0 { + data, err := io.ReadAll(stdin) + if err != nil { + return nil, fmt.Errorf("reading index from stdin: %w", err) + } + + return data, nil + } + + data, err := os.ReadFile(args[0]) //#nosec G304 + if err != nil { + return nil, fmt.Errorf("reading index %q: %w", args[0], err) + } + + return data, nil +} + +func buildFilter(objectFormat id.ObjectFormat, index *packidx.Packidx) ([]byte, error) { + objects := index.NumObjects() + + bucketCount, k, err := bloom.RecommendParams(objectFormat, objects) + if err != nil { + return nil, fmt.Errorf("choosing parameters: %w", err) + } + + builder, err := bloom.NewBuilder(objectFormat, bucketCount, k, index.PackHash()) + if err != nil { + return nil, fmt.Errorf("creating builder: %w", err) + } + + for pos := range objects { + builder.Add(index.OIDAt(pos)) + } + + return builder.Bytes(), nil +} @@ -2,4 +2,7 @@ module lindenii.org/go/furgit go 1.26.0 -require lindenii.org/go/lgo v0.1.10 +require ( + github.com/klauspost/compress v1.18.6 + lindenii.org/go/lgo v0.1.14 +) @@ -1,2 +1,4 @@ -lindenii.org/go/lgo v0.1.10 h1:UdixxhTB2cLGu2PSU+hMf9/T6Yc/2MNUGUZRsd2wLY8= -lindenii.org/go/lgo v0.1.10/go.mod h1:/ISiIVk/j7UFbnTLni1VA8cc2TJWa/WRqJPc3BStwn8= +github.com/klauspost/compress v1.18.6 h1:2jupLlAwFm95+YDR+NwD2MEfFO9d4z4Prjl1XXDjuao= +github.com/klauspost/compress v1.18.6/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= +lindenii.org/go/lgo v0.1.14 h1:n4+0iZKe0Ro/0OFdgjdYHPjy1kxHskyMF9miEzxRZWM= +lindenii.org/go/lgo v0.1.14/go.mod h1:/ISiIVk/j7UFbnTLni1VA8cc2TJWa/WRqJPc3BStwn8= diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go index 08ab483f..f6d53f5c 100644 --- a/internal/adler32/adler32_generic.go +++ b/internal/adler32/adler32_generic.go @@ -5,19 +5,19 @@ const ( Size = 4 // mod is the largest prime that is less than 65536. - mod = 65521 + mod = 65521 //nolint:unused // nmax is the largest n such that // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1. // It is mentioned in RFC 1950 (search for "5552"). - nmax = 5552 + nmax = 5552 //nolint:unused // binary representation compatible with standard library. - magic = "adl\x01" - marshaledSize = len(magic) + 4 + magic = "adl\x01" //nolint:unused + marshaledSize = len(magic) + 4 //nolint:unused ) // Add p to the running checksum d. -func update(d uint32, p []byte) uint32 { +func update(d uint32, p []byte) uint32 { //nolint:unused s1, s2 := d&0xffff, d>>16 for len(p) > 0 { diff --git a/internal/cache/clock/clock_ops.go b/internal/cache/clock/clock_ops.go index a21f44c3..6d4785f4 100644 --- a/internal/cache/clock/clock_ops.go +++ b/internal/cache/clock/clock_ops.go @@ -10,15 +10,11 @@ func (clock *Clock[K, V]) Add(key K, value V) bool { } // Get returns the value for key and marks it recently used. -// -//nolint:ireturn func (clock *Clock[K, V]) Get(key K) (V, bool) { return clock.shardFor(key).get(key) } // Peek returns the value for key without changing its recency. -// -//nolint:ireturn func (clock *Clock[K, V]) Peek(key K) (V, bool) { return clock.shardFor(key).peek(key) } diff --git a/internal/cache/clock/shard_read.go b/internal/cache/clock/shard_read.go index 624e3409..279f9725 100644 --- a/internal/cache/clock/shard_read.go +++ b/internal/cache/clock/shard_read.go @@ -1,8 +1,6 @@ package clock // get returns the value for key and marks it referenced. -// -//nolint:ireturn func (shard *shard[K, V]) get(key K) (V, bool) { e, ok := shard.items.Load(key) if !ok { @@ -19,8 +17,6 @@ func (shard *shard[K, V]) get(key K) (V, bool) { } // peek returns the value for key without affecting eviction. -// -//nolint:ireturn func (shard *shard[K, V]) peek(key K) (V, bool) { e, ok := shard.items.Load(key) if !ok { diff --git a/internal/compress/flate/_gen/gen_inflate.go b/internal/compress/flate/_gen/gen_inflate.go deleted file mode 100644 index 33f14005..00000000 --- a/internal/compress/flate/_gen/gen_inflate.go +++ /dev/null @@ -1,303 +0,0 @@ -//go:build generate -// +build generate - -//go:generate go run $GOFILE -//go:generate go fmt ../inflate_gen.go - -package main - -import ( - "os" - "strings" -) - -func main() { - f, err := os.Create("../inflate_gen.go") - if err != nil { - panic(err) - } - defer f.Close() - types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader", "Reader"} - names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader", "GenericReader"} - imports := []string{"bytes", "bufio", "fmt", "strings", "math/bits"} - f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. - -package flate - -import ( -`) - - for _, imp := range imports { - f.WriteString("\t\"" + imp + "\"\n") - } - f.WriteString(")\n\n") - - template := ` - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) $FUNCNAME$() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.($TYPE$) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = $FUNCNAME$ - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb®SizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb®SizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb®SizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = $FUNCNAME$ // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -` - for i, t := range types { - s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) - s = strings.Replace(s, "$TYPE$", t, -1) - f.WriteString(s) - } - f.WriteString("func (f *decompressor) huffmanBlockDecoder() {\n") - f.WriteString("\tswitch f.r.(type) {\n") - for i, t := range types { - f.WriteString("\t\tcase " + t + ":\n") - f.WriteString("\t\t\tf.huffman" + names[i] + "()\n") - } - f.WriteString("\t\tdefault:\n") - f.WriteString("\t\t\tf.huffmanGenericReader()\n") - f.WriteString("\t}\n}\n") -} diff --git a/internal/compress/flate/deflate.go b/internal/compress/flate/deflate.go deleted file mode 100644 index 8c8457e2..00000000 --- a/internal/compress/flate/deflate.go +++ /dev/null @@ -1,996 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright (c) 2015 Klaus Post -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "errors" - "fmt" - "io" - "math" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -const ( - NoCompression = 0 - BestSpeed = 1 - BestCompression = 9 - DefaultCompression = -1 - - // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman - // entropy encoding. This mode is useful in compressing data that has - // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) - // that lacks an entropy encoder. Compression gains are achieved when - // certain bytes in the input stream occur more frequently than others. - // - // Note that HuffmanOnly produces a compressed output that is - // RFC 1951 compliant. That is, any valid DEFLATE decompressor will - // continue to be able to decompress this output. - HuffmanOnly = -2 - ConstantCompression = HuffmanOnly // compatibility alias. - - logWindowSize = 15 - windowSize = 1 << logWindowSize - windowMask = windowSize - 1 - logMaxOffsetSize = 15 // Standard DEFLATE - minMatchLength = 4 // The smallest match that the compressor looks for - maxMatchLength = 258 // The longest match for the compressor - minOffsetSize = 1 // The shortest offset that makes any sense - - // The maximum number of tokens we will encode at the time. - // Smaller sizes usually creates less optimal blocks. - // Bigger can make context switching slow. - // We use this for levels 7-9, so we make it big. - maxFlateBlockTokens = 1 << 15 - maxStoreBlockSize = 65535 - hashBits = 17 // After 17 performance degrades - hashSize = 1 << hashBits - hashMask = (1 << hashBits) - 1 - hashShift = (hashBits + minMatchLength - 1) / minMatchLength - maxHashOffset = 1 << 28 - - skipNever = math.MaxInt32 - - debugDeflate = false -) - -type compressionLevel struct { - good, lazy, nice, chain, fastSkipHashing, level int -} - -// Compression levels have been rebalanced from zlib deflate defaults -// to give a bigger spread in speed and compression. -// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ -var levels = []compressionLevel{ - {}, // 0 - // Level 1-6 uses specialized algorithm - values not used - {0, 0, 0, 0, 0, 1}, - {0, 0, 0, 0, 0, 2}, - {0, 0, 0, 0, 0, 3}, - {0, 0, 0, 0, 0, 4}, - {0, 0, 0, 0, 0, 5}, - {0, 0, 0, 0, 0, 6}, - // Levels 7-9 use increasingly more lazy matching - // and increasingly stringent conditions for "good enough". - {8, 12, 16, 24, skipNever, 7}, - {16, 30, 40, 64, skipNever, 8}, - {32, 258, 258, 1024, skipNever, 9}, -} - -// advancedState contains state for the advanced levels, with bigger hash tables, etc. -type advancedState struct { - // deflate state - length int - offset int - maxInsertIndex int - chainHead int - hashOffset int - - ii uint16 // position of last match, intended to overflow to reset. - - // input window: unprocessed data is window[index:windowEnd] - index int - hashMatch [maxMatchLength + minMatchLength]uint32 - - // Input hash chains - // hashHead[hashValue] contains the largest inputIndex with the specified hash value - // If hashHead[hashValue] is within the current window, then - // hashPrev[hashHead[hashValue] & windowMask] contains the previous index - // with the same hash value. - hashHead [hashSize]uint32 - hashPrev [windowSize]uint32 -} - -type compressor struct { - compressionLevel - - h *huffmanEncoder - w *huffmanBitWriter - - // compression algorithm - fill func(*compressor, []byte) int // copy data to window - step func(*compressor) // process window - - window []byte - windowEnd int - blockStart int // window index where current tokens start - err error - - // queued output tokens - tokens tokens - fast fastEnc - state *advancedState - - sync bool // requesting flush - byteAvailable bool // if true, still need to process window[index-1]. -} - -func (d *compressor) fillDeflate(b []byte) int { - s := d.state - if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { - // shift the window by windowSize - // copy(d.window[:], d.window[windowSize:2*windowSize]) - *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) - s.index -= windowSize - d.windowEnd -= windowSize - if d.blockStart >= windowSize { - d.blockStart -= windowSize - } else { - d.blockStart = math.MaxInt32 - } - s.hashOffset += windowSize - if s.hashOffset > maxHashOffset { - delta := s.hashOffset - 1 - s.hashOffset -= delta - s.chainHead -= delta - // Iterate over slices instead of arrays to avoid copying - // the entire table onto the stack (Issue #18625). - for i, v := range s.hashPrev[:] { - if int(v) > delta { - s.hashPrev[i] = uint32(int(v) - delta) - } else { - s.hashPrev[i] = 0 - } - } - for i, v := range s.hashHead[:] { - if int(v) > delta { - s.hashHead[i] = uint32(int(v) - delta) - } else { - s.hashHead[i] = 0 - } - } - } - } - n := copy(d.window[d.windowEnd:], b) - d.windowEnd += n - return n -} - -func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { - if index > 0 || eof { - var window []byte - if d.blockStart <= index { - window = d.window[d.blockStart:index] - } - d.blockStart = index - // d.w.writeBlock(tok, eof, window) - d.w.writeBlockDynamic(tok, eof, window, d.sync) - return d.w.err - } - return nil -} - -// writeBlockSkip writes the current block and uses the number of tokens -// to determine if the block should be stored on no matches, or -// only huffman encoded. -func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { - if index > 0 || eof { - if d.blockStart <= index { - window := d.window[d.blockStart:index] - // If we removed less than a 64th of all literals - // we huffman compress the block. - if int(tok.n) > len(window)-int(tok.n>>6) { - d.w.writeBlockHuff(eof, window, d.sync) - } else { - // Write a dynamic huffman block. - d.w.writeBlockDynamic(tok, eof, window, d.sync) - } - } else { - d.w.writeBlock(tok, eof, nil) - } - d.blockStart = index - return d.w.err - } - return nil -} - -// fillWindow will fill the current window with the supplied -// dictionary and calculate all hashes. -// This is much faster than doing a full encode. -// Should only be used after a start/reset. -func (d *compressor) fillWindow(b []byte) { - // Do not fill window if we are in store-only or huffman mode. - if d.level <= 0 && d.level > -MinCustomWindowSize { - return - } - if d.fast != nil { - // encode the last data, but discard the result - if len(b) > maxMatchOffset { - b = b[len(b)-maxMatchOffset:] - } - d.fast.Encode(&d.tokens, b) - d.tokens.Reset() - return - } - s := d.state - // If we are given too much, cut it. - if len(b) > windowSize { - b = b[len(b)-windowSize:] - } - // Add all to window. - n := copy(d.window[d.windowEnd:], b) - - // Calculate 256 hashes at the time (more L1 cache hits) - loops := (n + 256 - minMatchLength) / 256 - for j := range loops { - startindex := j * 256 - end := min(startindex+256+minMatchLength-1, n) - tocheck := d.window[startindex:end] - dstSize := len(tocheck) - minMatchLength + 1 - - if dstSize <= 0 { - continue - } - - dst := s.hashMatch[:dstSize] - bulkHash4(tocheck, dst) - var newH uint32 - for i, val := range dst { - di := i + startindex - newH = val & hashMask - // Get previous value with the same hash. - // Our chain should point to the previous value. - s.hashPrev[di&windowMask] = s.hashHead[newH] - // Set the head of the hash chain to us. - s.hashHead[newH] = uint32(di + s.hashOffset) - } - } - // Update window information. - d.windowEnd += n - s.index = n -} - -// Try to find a match starting at index whose length is greater than prevSize. -// We only look at chainCount possibilities before giving up. -// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { - minMatchLook := min(lookahead, maxMatchLength) - - win := d.window[0 : pos+minMatchLook] - - // We quit when we get a match that's at least nice long - nice := min(d.nice, len(win)-pos) - - // If we've got a match that's good enough, only look in 1/4 the chain. - tries := d.chain - length = minMatchLength - 1 - - wEnd := win[pos+length] - wPos := win[pos:] - minIndex := max(pos-windowSize, 0) - offset = 0 - - if d.chain < 100 { - for i := prevHead; tries > 0; tries-- { - if wEnd == win[i+length] { - n := matchLen(win[i:i+minMatchLook], wPos) - if n > length { - length = n - offset = pos - i - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break - } - wEnd = win[pos+n] - } - } - if i <= minIndex { - // hashPrev[i & windowMask] has already been overwritten, so stop now. - break - } - i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex { - break - } - } - return - } - - // Minimum gain to accept a match. - cGain := 4 - - // Some like it higher (CSV), some like it lower (JSON) - const baseCost = 3 - // Base is 4 bytes at with an additional cost. - // Matches must be better than this. - - for i := prevHead; tries > 0; tries-- { - if wEnd == win[i+length] { - n := matchLen(win[i:i+minMatchLook], wPos) - if n > length { - // Calculate gain. Estimate - newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) - - // fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) - if newGain > cGain { - length = n - offset = pos - i - cGain = newGain - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break - } - wEnd = win[pos+n] - } - } - } - if i <= minIndex { - // hashPrev[i & windowMask] has already been overwritten, so stop now. - break - } - i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex { - break - } - } - return -} - -func (d *compressor) writeStoredBlock(buf []byte) error { - if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { - return d.w.err - } - d.w.writeBytes(buf) - return d.w.err -} - -// hash4 returns a hash representation of the first 4 bytes -// of the supplied slice. -// The caller must ensure that len(b) >= 4. -func hash4(b []byte) uint32 { - return hash4u(le.Load32(b, 0), hashBits) -} - -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> (32 - h) -} - -// bulkHash4 will compute hashes using the same -// algorithm as hash4 -func bulkHash4(b []byte, dst []uint32) { - if len(b) < 4 { - return - } - hb := le.Load32(b, 0) - - dst[0] = hash4u(hb, hashBits) - end := len(b) - 4 + 1 - for i := 1; i < end; i++ { - hb = (hb >> 8) | uint32(b[i+3])<<24 - dst[i] = hash4u(hb, hashBits) - } -} - -func (d *compressor) initDeflate() { - d.window = make([]byte, 2*windowSize) - d.byteAvailable = false - d.err = nil - if d.state == nil { - return - } - s := d.state - s.index = 0 - s.hashOffset = 1 - s.length = minMatchLength - 1 - s.offset = 0 - s.chainHead = -1 -} - -// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, -// meaning it always has lazy matching on. -func (d *compressor) deflateLazy() { - s := d.state - // Sanity enables additional runtime tests. - // It's intended to be used during development - // to supplement the currently ad-hoc unit tests. - const sanity = debugDeflate - - if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { - return - } - if d.windowEnd != s.index && d.chain > 100 { - // Get literal huffman coder. - if d.h == nil { - d.h = newHuffmanEncoder(maxFlateBlockTokens) - } - var tmp [256]uint16 - toIndex := d.window[s.index:d.windowEnd] - toIndex = toIndex[:min(len(toIndex), maxFlateBlockTokens)] - for _, v := range toIndex { - tmp[v]++ - } - d.h.generate(tmp[:], 15) - } - - s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) - - for { - if sanity && s.index > d.windowEnd { - panic("index > windowEnd") - } - lookahead := d.windowEnd - s.index - if lookahead < minMatchLength+maxMatchLength { - if !d.sync { - return - } - if sanity && s.index > d.windowEnd { - panic("index > windowEnd") - } - if lookahead == 0 { - // Flush current output block if any. - if d.byteAvailable { - // There is still one pending token that needs to be flushed - d.tokens.AddLiteral(d.window[s.index-1]) - d.byteAvailable = false - } - if d.tokens.n > 0 { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - return - } - } - if s.index < s.maxInsertIndex { - // Update the hash - hash := hash4(d.window[s.index:]) - ch := s.hashHead[hash] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[hash] = uint32(s.index + s.hashOffset) - } - prevLength := s.length - prevOffset := s.offset - s.length = minMatchLength - 1 - s.offset = 0 - minIndex := max(s.index-windowSize, 0) - - if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { - s.length = newLength - s.offset = newOffset - } - } - - if prevLength >= minMatchLength && s.length <= prevLength { - // No better match, but check for better match at end... - // - // Skip forward a number of bytes. - // Offset of 2 seems to yield best results. 3 is sometimes better. - const checkOff = 2 - - // Check all, except full length - if prevLength < maxMatchLength-checkOff { - prevIndex := s.index - 1 - if prevIndex+prevLength < s.maxInsertIndex { - end := min(lookahead, maxMatchLength+checkOff) - end += prevIndex - - // Hash at match end. - h := hash4(d.window[prevIndex+prevLength:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength - if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { - length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) - // It seems like a pure length metric is best. - if length > prevLength { - prevLength = length - prevOffset = prevIndex - ch2 - - // Extend back... - for i := checkOff - 1; i >= 0; i-- { - if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { - // Emit tokens we "owe" - for j := 0; j <= i; j++ { - d.tokens.AddLiteral(d.window[prevIndex+j]) - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - } - break - } else { - prevLength++ - } - } - } else if false { - // Check one further ahead. - // Only rarely better, disabled for now. - prevIndex++ - h := hash4(d.window[prevIndex+prevLength:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength - if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { - length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) - // It seems like a pure length metric is best. - if length > prevLength+checkOff { - prevLength = length - prevOffset = prevIndex - ch2 - prevIndex-- - - // Extend back... - for i := checkOff; i >= 0; i-- { - if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { - // Emit tokens we "owe" - for j := 0; j <= i; j++ { - d.tokens.AddLiteral(d.window[prevIndex+j]) - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - } - break - } else { - prevLength++ - } - } - } - } - } - } - } - } - // There was a match at the previous step, and the current match is - // not better. Output the previous match. - d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) - - // Insert in the hash table all strings up to the end of the match. - // index and index-1 are already inserted. If there is not enough - // lookahead, the last two strings are not inserted into the hash - // table. - newIndex := s.index + prevLength - 1 - // Calculate missing hashes - end := min(newIndex, s.maxInsertIndex) - end += minMatchLength - 1 - startindex := min(s.index+1, s.maxInsertIndex) - tocheck := d.window[startindex:end] - dstSize := len(tocheck) - minMatchLength + 1 - if dstSize > 0 { - dst := s.hashMatch[:dstSize] - bulkHash4(tocheck, dst) - var newH uint32 - for i, val := range dst { - di := i + startindex - newH = val & hashMask - // Get previous value with the same hash. - // Our chain should point to the previous value. - s.hashPrev[di&windowMask] = s.hashHead[newH] - // Set the head of the hash chain to us. - s.hashHead[newH] = uint32(di + s.hashOffset) - } - } - - s.index = newIndex - d.byteAvailable = false - s.length = minMatchLength - 1 - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.ii = 0 - } else { - // Reset, if we got a match this run. - if s.length >= minMatchLength { - s.ii = 0 - } - // We have a byte waiting. Emit it. - if d.byteAvailable { - s.ii++ - d.tokens.AddLiteral(d.window[s.index-1]) - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - - // If we have a long run of no matches, skip additional bytes - // Resets when s.ii overflows after 64KB. - if n := int(s.ii) - d.chain; n > 0 { - n = 1 + int(n>>6) - for j := 0; j < n; j++ { - if s.index >= d.windowEnd-1 { - break - } - d.tokens.AddLiteral(d.window[s.index-1]) - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - // Index... - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - s.index++ - } - // Flush last byte - d.tokens.AddLiteral(d.window[s.index-1]) - d.byteAvailable = false - // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - } - } else { - s.index++ - d.byteAvailable = true - } - } - } -} - -func (d *compressor) store() { - if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - d.windowEnd = 0 - } -} - -// fillWindow will fill the buffer with data for huffman-only compression. -// The number of bytes copied is returned. -func (d *compressor) fillBlock(b []byte) int { - n := copy(d.window[d.windowEnd:], b) - d.windowEnd += n - return n -} - -// storeHuff will compress and store the currently added data, -// if enough has been accumulated or we at the end of the stream. -// Any error that occurred will be in d.err -func (d *compressor) storeHuff() { - if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { - return - } - d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - d.windowEnd = 0 -} - -// storeFast will compress and store the currently added data, -// if enough has been accumulated or we at the end of the stream. -// Any error that occurred will be in d.err -func (d *compressor) storeFast() { - // We only compress if we have maxStoreBlockSize. - if d.windowEnd < len(d.window) { - if !d.sync { - return - } - // Handle extremely small sizes. - if d.windowEnd < 128 { - if d.windowEnd == 0 { - return - } - if d.windowEnd <= 32 { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - } else { - d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) - d.err = d.w.err - } - d.tokens.Reset() - d.windowEnd = 0 - d.fast.Reset() - return - } - } - - d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) - // If we made zero matches, store the block as is. - if d.tokens.n == 0 { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - // If we removed less than 1/16th, huffman compress the block. - } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { - d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - } else { - d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - } - d.tokens.Reset() - d.windowEnd = 0 -} - -// write will add input byte to the stream. -// Unless an error occurs all bytes will be consumed. -func (d *compressor) write(b []byte) (n int, err error) { - if d.err != nil { - return 0, d.err - } - n = len(b) - for len(b) > 0 { - if d.windowEnd == len(d.window) || d.sync { - d.step(d) - } - b = b[d.fill(d, b):] - if d.err != nil { - return 0, d.err - } - } - return n, d.err -} - -func (d *compressor) syncFlush() error { - d.sync = true - if d.err != nil { - return d.err - } - d.step(d) - if d.err == nil { - d.w.writeStoredHeader(0, false) - d.w.flush() - d.err = d.w.err - } - d.sync = false - return d.err -} - -func (d *compressor) init(w io.Writer, level int) (err error) { - d.w = newHuffmanBitWriter(w) - - switch { - case level == NoCompression: - d.window = make([]byte, maxStoreBlockSize) - d.fill = (*compressor).fillBlock - d.step = (*compressor).store - case level == ConstantCompression: - d.w.logNewTablePenalty = 10 - d.window = make([]byte, 32<<10) - d.fill = (*compressor).fillBlock - d.step = (*compressor).storeHuff - case level == DefaultCompression: - level = 5 - fallthrough - case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 7 - d.fast = newFastEnc(level) - d.window = make([]byte, maxStoreBlockSize) - d.fill = (*compressor).fillBlock - d.step = (*compressor).storeFast - case 7 <= level && level <= 9: - d.w.logNewTablePenalty = 8 - d.state = &advancedState{} - d.compressionLevel = levels[level] - d.initDeflate() - d.fill = (*compressor).fillDeflate - d.step = (*compressor).deflateLazy - case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: - d.w.logNewTablePenalty = 7 - d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} - d.window = make([]byte, maxStoreBlockSize) - d.fill = (*compressor).fillBlock - d.step = (*compressor).storeFast - default: - return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) - } - d.level = level - return nil -} - -// reset the state of the compressor. -func (d *compressor) reset(w io.Writer) { - d.w.reset(w) - d.sync = false - d.err = nil - // We only need to reset a few things for Snappy. - if d.fast != nil { - d.fast.Reset() - d.windowEnd = 0 - d.tokens.Reset() - return - } - switch d.compressionLevel.chain { - case 0: - // level was NoCompression or ConstantCompression. - d.windowEnd = 0 - default: - s := d.state - s.chainHead = -1 - for i := range s.hashHead { - s.hashHead[i] = 0 - } - for i := range s.hashPrev { - s.hashPrev[i] = 0 - } - s.hashOffset = 1 - s.index, d.windowEnd = 0, 0 - d.blockStart, d.byteAvailable = 0, false - d.tokens.Reset() - s.length = minMatchLength - 1 - s.offset = 0 - s.ii = 0 - s.maxInsertIndex = 0 - } -} - -func (d *compressor) close() error { - if d.err != nil { - return d.err - } - d.sync = true - d.step(d) - if d.err != nil { - return d.err - } - if d.w.writeStoredHeader(0, true); d.w.err != nil { - return d.w.err - } - d.w.flush() - d.w.reset(nil) - return d.w.err -} - -// NewWriter returns a new Writer compressing data at the given level. -// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); -// higher levels typically run slower but compress more. -// Level 0 (NoCompression) does not attempt any compression; it only adds the -// necessary DEFLATE framing. -// Level -1 (DefaultCompression) uses the default compression level. -// Level -2 (ConstantCompression) will use Huffman compression only, giving -// a very fast compression for all types of input, but sacrificing considerable -// compression efficiency. -// -// If level is in the range [-2, 9] then the error returned will be nil. -// Otherwise the error returned will be non-nil. -func NewWriter(w io.Writer, level int) (*Writer, error) { - var dw Writer - if err := dw.d.init(w, level); err != nil { - return nil, err - } - return &dw, nil -} - -// NewWriterDict is like NewWriter but initializes the new -// Writer with a preset dictionary. The returned Writer behaves -// as if the dictionary had been written to it without producing -// any compressed output. The compressed data written to w -// can only be decompressed by a Reader initialized with the -// same dictionary. -func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { - zw, err := NewWriter(w, level) - if err != nil { - return nil, err - } - zw.d.fillWindow(dict) - zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. - return zw, err -} - -// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. -const MinCustomWindowSize = 32 - -// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. -const MaxCustomWindowSize = windowSize - -// NewWriterWindow returns a new Writer compressing data with a custom window size. -// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. -func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { - if windowSize < MinCustomWindowSize { - return nil, errors.New("flate: requested window size less than MinWindowSize") - } - if windowSize > MaxCustomWindowSize { - return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") - } - var dw Writer - if err := dw.d.init(w, -windowSize); err != nil { - return nil, err - } - return &dw, nil -} - -// A Writer takes data written to it and writes the compressed -// form of that data to an underlying writer (see NewWriter). -type Writer struct { - d compressor - dict []byte -} - -// Write writes data to w, which will eventually write the -// compressed form of data to its underlying writer. -func (w *Writer) Write(data []byte) (n int, err error) { - return w.d.write(data) -} - -// Flush flushes any pending data to the underlying writer. -// It is useful mainly in compressed network protocols, to ensure that -// a remote reader has enough data to reconstruct a packet. -// Flush does not return until the data has been written. -// Calling Flush when there is no pending data still causes the Writer -// to emit a sync marker of at least 4 bytes. -// If the underlying writer returns an error, Flush returns that error. -// -// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. -func (w *Writer) Flush() error { - // For more about flushing: - // http://www.bolet.org/~pornin/deflate-flush.html - return w.d.syncFlush() -} - -// Close flushes and closes the writer. -func (w *Writer) Close() error { - return w.d.close() -} - -// Reset discards the writer's state and makes it equivalent to -// the result of NewWriter or NewWriterDict called with dst -// and w's level and dictionary. -func (w *Writer) Reset(dst io.Writer) { - if len(w.dict) > 0 { - // w was created with NewWriterDict - w.d.reset(dst) - if dst != nil { - w.d.fillWindow(w.dict) - } - } else { - // w was created with NewWriter - w.d.reset(dst) - } -} - -// ResetDict discards the writer's state and makes it equivalent to -// the result of NewWriter or NewWriterDict called with dst -// and w's level, but sets a specific dictionary. -func (w *Writer) ResetDict(dst io.Writer, dict []byte) { - w.dict = dict - w.d.reset(dst) - w.d.fillWindow(w.dict) -} diff --git a/internal/compress/flate/deflate_test.go b/internal/compress/flate/deflate_test.go deleted file mode 100644 index 9ac3da1f..00000000 --- a/internal/compress/flate/deflate_test.go +++ /dev/null @@ -1,708 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright (c) 2015 Klaus Post -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "fmt" - "io" - "os" - "reflect" - "strings" - "sync" - "testing" -) - -type deflateTest struct { - in []byte - level int - out []byte -} - -type deflateInflateTest struct { - in []byte -} - -type reverseBitsTest struct { - in uint16 - bitCount uint8 - out uint16 -} - -var deflateTests = []*deflateTest{ - 0: {[]byte{}, 0, []byte{0x3, 0x0}}, - 1: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, - 2: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, - 3: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, - - 4: {[]byte{0x11}, 0, []byte{0x0, 0x1, 0x0, 0xfe, 0xff, 0x11, 0x3, 0x0}}, - 5: {[]byte{0x11, 0x12}, 0, []byte{0x0, 0x2, 0x0, 0xfd, 0xff, 0x11, 0x12, 0x3, 0x0}}, - 6: { - []byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - 0, - []byte{0x0, 0x8, 0x0, 0xf7, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3, 0x0}, - }, - 7: {[]byte{}, 1, []byte{0x3, 0x0}}, - 8: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, - 9: {[]byte{0x11, 0x12}, BestCompression, []byte{0x12, 0x14, 0x2, 0xc, 0x0}}, - 10: {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{0x12, 0x84, 0x1, 0xc0, 0x0}}, - 11: {[]byte{}, 9, []byte{0x3, 0x0}}, - 12: {[]byte{0x11}, 9, []byte{0x12, 0x4, 0xc, 0x0}}, - 13: {[]byte{0x11, 0x12}, 9, []byte{0x12, 0x14, 0x2, 0xc, 0x0}}, - 14: {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{0x12, 0x84, 0x1, 0xc0, 0x0}}, -} - -var deflateInflateTests = []*deflateInflateTest{ - {[]byte{}}, - {[]byte{0x11}}, - {[]byte{0x11, 0x12}}, - {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, - {[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, - {largeDataChunk()}, -} - -var reverseBitsTests = []*reverseBitsTest{ - {1, 1, 1}, - {1, 2, 2}, - {1, 3, 4}, - {1, 4, 8}, - {1, 5, 16}, - {17, 5, 17}, - {257, 9, 257}, - {29, 5, 23}, -} - -func largeDataChunk() []byte { - result := make([]byte, 100000) - for i := range result { - result[i] = byte(i * i & 0xFF) - } - return result -} - -func TestBulkHash4(t *testing.T) { - for _, x := range deflateTests { - y := x.out - if len(y) >= minMatchLength { - y = append(y, y...) - for j := 4; j < len(y); j++ { - y := y[:j] - dst := make([]uint32, len(y)-minMatchLength+1) - for i := range dst { - dst[i] = uint32(i + 100) - } - bulkHash4(y, dst) - for i, val := range dst { - got := val - expect := hash4(y[i:]) - if got != expect && got == uint32(i)+100 { - t.Errorf("Len:%d Index:%d, expected 0x%08x but not modified", len(y), i, expect) - } else if got != expect { - t.Errorf("Len:%d Index:%d, got 0x%08x expected:0x%08x", len(y), i, got, expect) - } else { - // t.Logf("Len:%d Index:%d OK (0x%08x)", len(y), i, got) - } - } - } - } - } -} - -func TestDeflate(t *testing.T) { - for i, h := range deflateTests { - var buf bytes.Buffer - w, err := NewWriter(&buf, h.level) - if err != nil { - t.Errorf("NewWriter: %v", err) - continue - } - w.Write(h.in) - w.Close() - if !bytes.Equal(buf.Bytes(), h.out) { - t.Errorf("%d: Deflate(%d, %x) got \n%#v, want \n%#v", i, h.level, h.in, buf.Bytes(), h.out) - } - } -} - -// A sparseReader returns a stream consisting of 0s followed by 1<<16 1s. -// This tests missing hash references in a very large input. -type sparseReader struct { - l int64 - cur int64 -} - -func (r *sparseReader) Read(b []byte) (n int, err error) { - if r.cur >= r.l { - return 0, io.EOF - } - n = len(b) - cur := r.cur + int64(n) - if cur > r.l { - n -= int(cur - r.l) - cur = r.l - } - for i := range b[0:n] { - if r.cur+int64(i) >= r.l-1<<16 { - b[i] = 1 - } else { - b[i] = 0 - } - } - r.cur = cur - return -} - -func TestVeryLongSparseChunk(t *testing.T) { - if testing.Short() { - t.Skip("skipping sparse chunk during short test") - } - var buf bytes.Buffer - w, err := NewWriter(&buf, 1) - if err != nil { - t.Errorf("NewWriter: %v", err) - return - } - if _, err = io.Copy(w, &sparseReader{l: 23e8}); err != nil { - t.Errorf("Compress failed: %v", err) - return - } - t.Log("Length:", buf.Len()) -} - -func TestOneMByte(t *testing.T) { - var input [1024 * 1024]byte - - var compressedOutput bytes.Buffer - for level := HuffmanOnly; level <= BestCompression; level++ { - compressedOutput.Reset() - compressor, err := NewWriter(&compressedOutput, level) - if err != nil { - t.Fatalf("create: %s", err) - } - // Use single write... - if _, err := compressor.Write(input[:]); err != nil { - t.Fatalf("compress: %s", err) - } - - if err := compressor.Close(); err != nil { - t.Fatalf("close: %s", err) - } - - var decompressedOutput bytes.Buffer - - decompresser := NewReader(&compressedOutput) - t.Log("level:", level, "compressed:", compressedOutput.Len()) - if _, err := io.Copy(&decompressedOutput, decompresser); err != nil { - t.Fatalf("decompress: %s", err) - } - - if !bytes.Equal(input[:], decompressedOutput.Bytes()) { - t.Fatal("input and output do not match") - } - } -} - -type syncBuffer struct { - buf bytes.Buffer - mu sync.RWMutex - closed bool - ready chan bool -} - -func newSyncBuffer() *syncBuffer { - return &syncBuffer{ready: make(chan bool, 1)} -} - -func (b *syncBuffer) Read(p []byte) (n int, err error) { - for { - b.mu.RLock() - n, err = b.buf.Read(p) - b.mu.RUnlock() - if n > 0 || b.closed { - return - } - <-b.ready - } -} - -func (b *syncBuffer) signal() { - select { - case b.ready <- true: - default: - } -} - -func (b *syncBuffer) Write(p []byte) (n int, err error) { - n, err = b.buf.Write(p) - b.signal() - return -} - -func (b *syncBuffer) WriteMode() { - b.mu.Lock() -} - -func (b *syncBuffer) ReadMode() { - b.mu.Unlock() - b.signal() -} - -func (b *syncBuffer) Close() error { - b.closed = true - b.signal() - return nil -} - -func testSync(t *testing.T, level int, input []byte, name string) { - if len(input) == 0 { - return - } - - t.Logf("--testSync %d, %d, %s", level, len(input), name) - buf := newSyncBuffer() - buf1 := new(bytes.Buffer) - buf.WriteMode() - w, err := NewWriter(io.MultiWriter(buf, buf1), level) - if err != nil { - t.Errorf("NewWriter: %v", err) - return - } - r := NewReader(buf) - - // Write half the input and read back. - for i := range 2 { - var lo, hi int - if i == 0 { - lo, hi = 0, (len(input)+1)/2 - } else { - lo, hi = (len(input)+1)/2, len(input) - } - t.Logf("#%d: write %d-%d", i, lo, hi) - if _, err := w.Write(input[lo:hi]); err != nil { - t.Errorf("testSync: write: %v", err) - return - } - if i == 0 { - if err := w.Flush(); err != nil { - t.Errorf("testSync: flush: %v", err) - return - } - } else { - if err := w.Close(); err != nil { - t.Errorf("testSync: close: %v", err) - } - } - buf.ReadMode() - out := make([]byte, hi-lo+1) - m, err := io.ReadAtLeast(r, out, hi-lo) - t.Logf("#%d: read %d", i, m) - if m != hi-lo || err != nil { - t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len()) - return - } - if !bytes.Equal(input[lo:hi], out[:hi-lo]) { - t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) - return - } - // This test originally checked that after reading - // the first half of the input, there was nothing left - // in the read buffer (buf.buf.Len() != 0) but that is - // not necessarily the case: the write Flush may emit - // some extra framing bits that are not necessary - // to process to obtain the first half of the uncompressed - // data. The test ran correctly most of the time, because - // the background goroutine had usually read even - // those extra bits by now, but it's not a useful thing to - // check. - buf.WriteMode() - } - buf.ReadMode() - out := make([]byte, 10) - if n, err := r.Read(out); n > 0 || err != io.EOF { - t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n]) - } - if buf.buf.Len() != 0 { - t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name) - } - r.Close() - - // stream should work for ordinary reader too - r = NewReader(buf1) - out, err = io.ReadAll(r) - if err != nil { - t.Errorf("testSync: read: %s", err) - return - } - r.Close() - if !bytes.Equal(input, out) { - t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name) - } -} - -func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name string, limit int) { - var buffer bytes.Buffer - w, err := NewWriter(&buffer, level) - if err != nil { - t.Errorf("NewWriter: %v", err) - return - } - w.Write(input) - w.Close() - if limit > 0 { - t.Logf("level: %d - Size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len()) - } - if limit > 0 && buffer.Len() > limit { - t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit) - } - - r := NewReader(&buffer) - out, err := io.ReadAll(r) - if err != nil { - t.Errorf("read: %s", err) - return - } - r.Close() - if !bytes.Equal(input, out) { - os.WriteFile("testdata/fails/"+t.Name()+".got", out, os.ModePerm) - os.WriteFile("testdata/fails/"+t.Name()+".want", input, os.ModePerm) - t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name) - return - } - testSync(t, level, input, name) -} - -func testToFromWithLimit(t *testing.T, input []byte, name string, limit [11]int) { - for i := range 10 { - testToFromWithLevelAndLimit(t, i, input, name, limit[i]) - } - testToFromWithLevelAndLimit(t, -2, input, name, limit[10]) -} - -func TestDeflateInflate(t *testing.T) { - for i, h := range deflateInflateTests { - testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [11]int{}) - } -} - -func TestReverseBits(t *testing.T) { - for _, h := range reverseBitsTests { - if v := reverseBits(h.in, h.bitCount); v != h.out { - t.Errorf("reverseBits(%v,%v) = %v, want %v", - h.in, h.bitCount, v, h.out) - } - } -} - -type deflateInflateStringTest struct { - filename string - label string - limit [11]int // Number 11 is ConstantCompression -} - -var deflateInflateStringTests = []deflateInflateStringTest{ - { - "../testdata/e.txt", - "2.718281828...", - [...]int{100018, 67900, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683 + 100}, - }, - { - "../testdata/Mark.Twain-Tom.Sawyer.txt", - "Mark.Twain-Tom.Sawyer", - [...]int{387999, 185000, 182361, 179974, 174124, 168819, 162936, 160506, 160295, 160295, 233460 + 100}, - }, -} - -func TestDeflateInflateString(t *testing.T) { - for _, test := range deflateInflateStringTests { - gold, err := os.ReadFile(test.filename) - if err != nil { - t.Error(err) - } - // Remove returns that may be present on Windows - neutral := strings.Map(func(r rune) rune { - if r != '\r' { - return r - } - return -1 - }, string(gold)) - - testToFromWithLimit(t, []byte(neutral), test.label, test.limit) - - if testing.Short() { - break - } - } -} - -func TestReaderDict(t *testing.T) { - const ( - dict = "hello world" - text = "hello again world" - ) - var b bytes.Buffer - w, err := NewWriter(&b, 5) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - w.Write([]byte(dict)) - w.Flush() - b.Reset() - w.Write([]byte(text)) - w.Close() - - r := NewReaderDict(&b, []byte(dict)) - data, err := io.ReadAll(r) - if err != nil { - t.Fatal(err) - } - if string(data) != "hello again world" { - t.Fatalf("read returned %q want %q", string(data), text) - } -} - -func TestWriterDict(t *testing.T) { - const ( - dict = "hello world Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." - text = "hello world Lorem ipsum dolor sit amet" - ) - // This test is sensitive to algorithm changes that skip - // data in favour of speed. Higher levels are less prone to this - // so we test level 4-9. - for l := 4; l < 9; l++ { - var b bytes.Buffer - w, err := NewWriter(&b, l) - if err != nil { - t.Fatalf("level %d, NewWriter: %v", l, err) - } - w.Write([]byte(dict)) - w.Flush() - b.Reset() - w.Write([]byte(text)) - w.Close() - - var b1 bytes.Buffer - w, _ = NewWriterDict(&b1, l, []byte(dict)) - w.Write([]byte(text)) - w.Close() - - if !bytes.Equal(b1.Bytes(), b.Bytes()) { - t.Errorf("level %d, writer wrote\n%v\n want\n%v", l, b1.Bytes(), b.Bytes()) - } - } -} - -// See http://code.google.com/p/go/issues/detail?id=2508 -func TestRegression2508(t *testing.T) { - if testing.Short() { - t.Logf("test disabled with -short") - return - } - w, err := NewWriter(io.Discard, 1) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - buf := make([]byte, 1024) - for range 131072 { - if _, err := w.Write(buf); err != nil { - t.Fatalf("writer failed: %v", err) - } - } - w.Close() -} - -func TestWriterReset(t *testing.T) { - for level := -2; level <= 9; level++ { - if level == -1 { - level++ - } - if testing.Short() && level > 1 { - break - } - w, err := NewWriter(io.Discard, level) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - buf := []byte("hello world") - for range 1024 { - w.Write(buf) - } - w.Reset(io.Discard) - - wref, err := NewWriter(io.Discard, level) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - - // DeepEqual doesn't compare functions. - w.d.fill, wref.d.fill = nil, nil - w.d.step, wref.d.step = nil, nil - w.d.state, wref.d.state = nil, nil - w.d.fast, wref.d.fast = nil, nil - - // hashMatch is always overwritten when used. - if w.d.tokens.n != 0 { - t.Errorf("level %d Writer not reset after Reset. %d tokens were present", level, w.d.tokens.n) - } - // As long as the length is 0, we don't care about the content. - w.d.tokens = wref.d.tokens - - // We don't care if there are values in the window, as long as it is at d.index is 0 - w.d.window = wref.d.window - if !reflect.DeepEqual(w, wref) { - t.Errorf("level %d Writer not reset after Reset", level) - } - } - - for i := HuffmanOnly; i <= BestCompression; i++ { - testResetOutput(t, fmt.Sprint("level-", i), func(w io.Writer) (*Writer, error) { return NewWriter(w, i) }) - } - dict := []byte(strings.Repeat("we are the world - how are you?", 3)) - for i := HuffmanOnly; i <= BestCompression; i++ { - testResetOutput(t, fmt.Sprint("dict-level-", i), func(w io.Writer) (*Writer, error) { return NewWriterDict(w, i, dict) }) - } - for i := HuffmanOnly; i <= BestCompression; i++ { - testResetOutput(t, fmt.Sprint("dict-reset-level-", i), func(w io.Writer) (*Writer, error) { - w2, err := NewWriter(nil, i) - if err != nil { - return w2, err - } - w2.ResetDict(w, dict) - return w2, nil - }) - } - testResetOutput(t, fmt.Sprint("dict-reset-window"), func(w io.Writer) (*Writer, error) { - w2, err := NewWriterWindow(nil, 1024) - if err != nil { - return w2, err - } - w2.ResetDict(w, dict) - return w2, nil - }) -} - -func testResetOutput(t *testing.T, name string, newWriter func(w io.Writer) (*Writer, error)) { - t.Run(name, func(t *testing.T) { - buf := new(bytes.Buffer) - w, err := newWriter(buf) - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - b := []byte("hello world - how are you doing?") - for range 1024 { - w.Write(b) - } - w.Close() - out1 := buf.Bytes() - - buf2 := new(bytes.Buffer) - w.Reset(buf2) - for range 1024 { - w.Write(b) - } - w.Close() - out2 := buf2.Bytes() - - if len(out1) != len(out2) { - t.Errorf("got %d, expected %d bytes", len(out2), len(out1)) - } - if !bytes.Equal(out1, out2) { - mm := 0 - for i, b := range out1[:len(out2)] { - if b != out2[i] { - t.Errorf("mismatch index %d: %02x, expected %02x", i, out2[i], b) - } - mm++ - if mm == 10 { - t.Fatal("Stopping") - } - } - } - t.Logf("got %d bytes", len(out1)) - }) -} - -// TestBestSpeed tests that round-tripping through deflate and then inflate -// recovers the original input. The Write sizes are near the thresholds in the -// compressor.encSpeed method (0, 16, 128), as well as near maxStoreBlockSize -// (65535). -func TestBestSpeed(t *testing.T) { - abc := make([]byte, 128) - for i := range abc { - abc[i] = byte(i) - } - abcabc := bytes.Repeat(abc, 131072/len(abc)) - var want []byte - - testCases := [][]int{ - {65536, 0}, - {65536, 1}, - {65536, 1, 256}, - {65536, 1, 65536}, - {65536, 14}, - {65536, 15}, - {65536, 16}, - {65536, 16, 256}, - {65536, 16, 65536}, - {65536, 127}, - {65536, 128}, - {65536, 128, 256}, - {65536, 128, 65536}, - {65536, 129}, - {65536, 65536, 256}, - {65536, 65536, 65536}, - } - - for i, tc := range testCases { - if testing.Short() && i > 5 { - t.Skip() - } - for _, firstN := range []int{1, 65534, 65535, 65536, 65537, 131072} { - tc[0] = firstN - outer: - for _, flush := range []bool{false, true} { - buf := new(bytes.Buffer) - want = want[:0] - - w, err := NewWriter(buf, BestSpeed) - if err != nil { - t.Errorf("i=%d, firstN=%d, flush=%t: NewWriter: %v", i, firstN, flush, err) - continue - } - for _, n := range tc { - want = append(want, abcabc[:n]...) - if _, err := w.Write(abcabc[:n]); err != nil { - t.Errorf("i=%d, firstN=%d, flush=%t: Write: %v", i, firstN, flush, err) - continue outer - } - if !flush { - continue - } - if err := w.Flush(); err != nil { - t.Errorf("i=%d, firstN=%d, flush=%t: Flush: %v", i, firstN, flush, err) - continue outer - } - } - if err := w.Close(); err != nil { - t.Errorf("i=%d, firstN=%d, flush=%t: Close: %v", i, firstN, flush, err) - continue - } - - r := NewReader(buf) - got, err := io.ReadAll(r) - if err != nil { - t.Errorf("i=%d, firstN=%d, flush=%t: ReadAll: %v", i, firstN, flush, err) - continue - } - r.Close() - - if !bytes.Equal(got, want) { - t.Errorf("i=%d, firstN=%d, flush=%t: corruption during deflate-then-inflate", i, firstN, flush) - continue - } - } - } - } -} diff --git a/internal/compress/flate/dict_decoder.go b/internal/compress/flate/dict_decoder.go deleted file mode 100644 index cb855abc..00000000 --- a/internal/compress/flate/dict_decoder.go +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := min(dstPos+length, len(dd.hist)) - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. -loop: - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - if dstPos < endPos { - goto loop // Avoid for-loop so that this function can be inlined - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/internal/compress/flate/dict_decoder_test.go b/internal/compress/flate/dict_decoder_test.go deleted file mode 100644 index 8bc48a3e..00000000 --- a/internal/compress/flate/dict_decoder_test.go +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "strings" - "testing" -) - -func TestDictDecoder(t *testing.T) { - const ( - abc = "ABC\n" - fox = "The quick brown fox jumped over the lazy dog!\n" - poem = "The Road Not Taken\nRobert Frost\n" + - "\n" + - "Two roads diverged in a yellow wood,\n" + - "And sorry I could not travel both\n" + - "And be one traveler, long I stood\n" + - "And looked down one as far as I could\n" + - "To where it bent in the undergrowth;\n" + - "\n" + - "Then took the other, as just as fair,\n" + - "And having perhaps the better claim,\n" + - "Because it was grassy and wanted wear;\n" + - "Though as for that the passing there\n" + - "Had worn them really about the same,\n" + - "\n" + - "And both that morning equally lay\n" + - "In leaves no step had trodden black.\n" + - "Oh, I kept the first for another day!\n" + - "Yet knowing how way leads on to way,\n" + - "I doubted if I should ever come back.\n" + - "\n" + - "I shall be telling this with a sigh\n" + - "Somewhere ages and ages hence:\n" + - "Two roads diverged in a wood, and I-\n" + - "I took the one less traveled by,\n" + - "And that has made all the difference.\n" - ) - - poemRefs := []struct { - dist int // Backward distance (0 if this is an insertion) - length int // Length of copy or insertion - }{ - {0, 38}, - {33, 3}, - {0, 48}, - {79, 3}, - {0, 11}, - {34, 5}, - {0, 6}, - {23, 7}, - {0, 8}, - {50, 3}, - {0, 2}, - {69, 3}, - {34, 5}, - {0, 4}, - {97, 3}, - {0, 4}, - {43, 5}, - {0, 6}, - {7, 4}, - {88, 7}, - {0, 12}, - {80, 3}, - {0, 2}, - {141, 4}, - {0, 1}, - {196, 3}, - {0, 3}, - {157, 3}, - {0, 6}, - {181, 3}, - {0, 2}, - {23, 3}, - {77, 3}, - {28, 5}, - {128, 3}, - {110, 4}, - {70, 3}, - {0, 4}, - {85, 6}, - {0, 2}, - {182, 6}, - {0, 4}, - {133, 3}, - {0, 7}, - {47, 5}, - {0, 20}, - {112, 5}, - {0, 1}, - {58, 3}, - {0, 8}, - {59, 3}, - {0, 4}, - {173, 3}, - {0, 5}, - {114, 3}, - {0, 4}, - {92, 5}, - {0, 2}, - {71, 3}, - {0, 2}, - {76, 5}, - {0, 1}, - {46, 3}, - {96, 4}, - {130, 4}, - {0, 3}, - {360, 3}, - {0, 3}, - {178, 5}, - {0, 7}, - {75, 3}, - {0, 3}, - {45, 6}, - {0, 6}, - {299, 6}, - {180, 3}, - {70, 6}, - {0, 1}, - {48, 3}, - {66, 4}, - {0, 3}, - {47, 5}, - {0, 9}, - {325, 3}, - {0, 1}, - {359, 3}, - {318, 3}, - {0, 2}, - {199, 3}, - {0, 1}, - {344, 3}, - {0, 3}, - {248, 3}, - {0, 10}, - {310, 3}, - {0, 3}, - {93, 6}, - {0, 3}, - {252, 3}, - {157, 4}, - {0, 2}, - {273, 5}, - {0, 14}, - {99, 4}, - {0, 1}, - {464, 4}, - {0, 2}, - {92, 4}, - {495, 3}, - {0, 1}, - {322, 4}, - {16, 4}, - {0, 3}, - {402, 3}, - {0, 2}, - {237, 4}, - {0, 2}, - {432, 4}, - {0, 1}, - {483, 5}, - {0, 2}, - {294, 4}, - {0, 2}, - {306, 3}, - {113, 5}, - {0, 1}, - {26, 4}, - {164, 3}, - {488, 4}, - {0, 1}, - {542, 3}, - {248, 6}, - {0, 5}, - {205, 3}, - {0, 8}, - {48, 3}, - {449, 6}, - {0, 2}, - {192, 3}, - {328, 4}, - {9, 5}, - {433, 3}, - {0, 3}, - {622, 25}, - {615, 5}, - {46, 5}, - {0, 2}, - {104, 3}, - {475, 10}, - {549, 3}, - {0, 4}, - {597, 8}, - {314, 3}, - {0, 1}, - {473, 6}, - {317, 5}, - {0, 1}, - {400, 3}, - {0, 3}, - {109, 3}, - {151, 3}, - {48, 4}, - {0, 4}, - {125, 3}, - {108, 3}, - {0, 2}, - } - - var got, want bytes.Buffer - var dd dictDecoder - dd.init(1<<11, nil) - - writeCopy := func(dist, length int) { - for length > 0 { - cnt := dd.tryWriteCopy(dist, length) - if cnt == 0 { - cnt = dd.writeCopy(dist, length) - } - - length -= cnt - if dd.availWrite() == 0 { - got.Write(dd.readFlush()) - } - } - } - writeString := func(str string) { - for len(str) > 0 { - cnt := copy(dd.writeSlice(), str) - str = str[cnt:] - dd.writeMark(cnt) - if dd.availWrite() == 0 { - got.Write(dd.readFlush()) - } - } - } - - writeString(".") - want.WriteByte('.') - - str := poem - for _, ref := range poemRefs { - if ref.dist == 0 { - writeString(str[:ref.length]) - } else { - writeCopy(ref.dist, ref.length) - } - str = str[ref.length:] - } - want.WriteString(poem) - - writeCopy(dd.histSize(), 33) - want.Write(want.Bytes()[:33]) - - writeString(abc) - writeCopy(len(abc), 59*len(abc)) - want.WriteString(strings.Repeat(abc, 60)) - - writeString(fox) - writeCopy(len(fox), 9*len(fox)) - want.WriteString(strings.Repeat(fox, 10)) - - writeString(".") - writeCopy(1, 9) - want.WriteString(strings.Repeat(".", 10)) - - writeString(strings.ToUpper(poem)) - writeCopy(len(poem), 7*len(poem)) - want.WriteString(strings.Repeat(strings.ToUpper(poem), 8)) - - writeCopy(dd.histSize(), 10) - want.Write(want.Bytes()[want.Len()-dd.histSize():][:10]) - - got.Write(dd.readFlush()) - if got.String() != want.String() { - t.Errorf("final string mismatch:\ngot %q\nwant %q", got.String(), want.String()) - } -} diff --git a/internal/compress/flate/example_test.go b/internal/compress/flate/example_test.go deleted file mode 100644 index 45f44edf..00000000 --- a/internal/compress/flate/example_test.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate_test - -import ( - "bytes" - "fmt" - "io" - "log" - "os" - "strings" - "sync" - - "lindenii.org/go/furgit/internal/compress/flate" -) - -// In performance critical applications, Reset can be used to discard the -// current compressor or decompressor state and reinitialize them quickly -// by taking advantage of previously allocated memory. -func Example_reset() { - proverbs := []string{ - "Don't communicate by sharing memory, share memory by communicating.\n", - "Concurrency is not parallelism.\n", - "The bigger the interface, the weaker the abstraction.\n", - "Documentation is for users.\n", - } - - var r strings.Reader - var b bytes.Buffer - buf := make([]byte, 32<<10) - - zw, err := flate.NewWriter(nil, flate.DefaultCompression) - if err != nil { - log.Fatal(err) - } - zr := flate.NewReader(nil) - - for _, s := range proverbs { - r.Reset(s) - b.Reset() - - // Reset the compressor and encode from some input stream. - zw.Reset(&b) - if _, err := io.CopyBuffer(zw, &r, buf); err != nil { - log.Fatal(err) - } - if err := zw.Close(); err != nil { - log.Fatal(err) - } - - // Reset the decompressor and decode to some output stream. - if err := zr.(flate.Resetter).Reset(&b, nil); err != nil { - log.Fatal(err) - } - if _, err := io.CopyBuffer(os.Stdout, zr, buf); err != nil { - log.Fatal(err) - } - if err := zr.Close(); err != nil { - log.Fatal(err) - } - } - - // Output: - // Don't communicate by sharing memory, share memory by communicating. - // Concurrency is not parallelism. - // The bigger the interface, the weaker the abstraction. - // Documentation is for users. -} - -// A preset dictionary can be used to improve the compression ratio. -// The downside to using a dictionary is that the compressor and decompressor -// must agree in advance what dictionary to use. -func Example_dictionary() { - // The dictionary is a string of bytes. When compressing some input data, - // the compressor will attempt to substitute substrings with matches found - // in the dictionary. As such, the dictionary should only contain substrings - // that are expected to be found in the actual data stream. - const dict = `<?xml version="1.0"?>` + `<book>` + `<data>` + `<meta name="` + `" content="` - - // The data to compress should (but is not required to) contain frequent - // substrings that match those in the dictionary. - const data = `<?xml version="1.0"?> -<book> - <meta name="title" content="The Go Programming Language"/> - <meta name="authors" content="Alan Donovan and Brian Kernighan"/> - <meta name="published" content="2015-10-26"/> - <meta name="isbn" content="978-0134190440"/> - <data>...</data> -</book> -` - - var b bytes.Buffer - - // Compress the data using the specially crafted dictionary. - zw, err := flate.NewWriterDict(&b, flate.BestCompression, []byte(dict)) - if err != nil { - log.Fatal(err) - } - if _, err := io.Copy(zw, strings.NewReader(data)); err != nil { - log.Fatal(err) - } - if err := zw.Close(); err != nil { - log.Fatal(err) - } - - // The decompressor must use the same dictionary as the compressor. - // Otherwise, the input may appear as corrupted. - fmt.Println("Decompressed output using the dictionary:") - zr := flate.NewReaderDict(bytes.NewReader(b.Bytes()), []byte(dict)) - if _, err := io.Copy(os.Stdout, zr); err != nil { - log.Fatal(err) - } - if err := zr.Close(); err != nil { - log.Fatal(err) - } - - fmt.Println() - - // Substitute all of the bytes in the dictionary with a '#' to visually - // demonstrate the approximate effectiveness of using a preset dictionary. - fmt.Println("Substrings matched by the dictionary are marked with #:") - hashDict := []byte(dict) - for i := range hashDict { - hashDict[i] = '#' - } - zr = flate.NewReaderDict(&b, hashDict) - if _, err := io.Copy(os.Stdout, zr); err != nil { - log.Fatal(err) - } - if err := zr.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // Decompressed output using the dictionary: - // <?xml version="1.0"?> - // <book> - // <meta name="title" content="The Go Programming Language"/> - // <meta name="authors" content="Alan Donovan and Brian Kernighan"/> - // <meta name="published" content="2015-10-26"/> - // <meta name="isbn" content="978-0134190440"/> - // <data>...</data> - // </book> - // - // Substrings matched by the dictionary are marked with #: - // ##################### - // ###### - // ############title###########The Go Programming Language"/# - // ############authors###########Alan Donovan and Brian Kernighan"/# - // ############published###########2015-10-26"/# - // ############isbn###########978-0134190440"/# - // ######...</##### - // </##### -} - -// DEFLATE is suitable for transmitting compressed data across the network. -func Example_synchronization() { - var wg sync.WaitGroup - defer wg.Wait() - - // Use io.Pipe to simulate a network connection. - // A real network application should take care to properly close the - // underlying connection. - rp, wp := io.Pipe() - - // Start a goroutine to act as the transmitter. - wg.Go(func() { - defer wp.Close() - - zw, err := flate.NewWriter(wp, flate.BestSpeed) - if err != nil { - log.Fatal(err) - } - - b := make([]byte, 256) - for m := range strings.FieldsSeq("A long time ago in a galaxy far, far away...") { - // We use a simple framing format where the first byte is the - // message length, followed the message itself. - b[0] = uint8(copy(b[1:], m)) - - if _, err := zw.Write(b[:1+len(m)]); err != nil { - log.Fatal(err) - } - - // Flush ensures that the receiver can read all data sent so far. - if err := zw.Flush(); err != nil { - log.Fatal(err) - } - } - - if err := zw.Close(); err != nil { - log.Fatal(err) - } - }) - - // Start a goroutine to act as the receiver. - wg.Go(func() { - zr := flate.NewReader(rp) - - b := make([]byte, 256) - for { - // Read the message length. - // This is guaranteed to return for every corresponding - // Flush and Close on the transmitter side. - if _, err := io.ReadFull(zr, b[:1]); err != nil { - if err == io.EOF { - break // The transmitter closed the stream - } - log.Fatal(err) - } - - // Read the message content. - n := int(b[0]) - if _, err := io.ReadFull(zr, b[:n]); err != nil { - log.Fatal(err) - } - - fmt.Printf("Received %d bytes: %s\n", n, b[:n]) - } - fmt.Println() - - if err := zr.Close(); err != nil { - log.Fatal(err) - } - }) - - // Output: - // Received 1 bytes: A - // Received 4 bytes: long - // Received 4 bytes: time - // Received 3 bytes: ago - // Received 2 bytes: in - // Received 1 bytes: a - // Received 6 bytes: galaxy - // Received 4 bytes: far, - // Received 3 bytes: far - // Received 7 bytes: away... -} diff --git a/internal/compress/flate/fast_encoder.go b/internal/compress/flate/fast_encoder.go deleted file mode 100644 index 7af14349..00000000 --- a/internal/compress/flate/fast_encoder.go +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Modified for deflate by Klaus Post (c) 2015. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "fmt" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -type fastEnc interface { - Encode(dst *tokens, src []byte) - Reset() -} - -func newFastEnc(level int) fastEnc { - switch level { - case 1: - return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} - case 2: - return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} - case 3: - return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} - case 4: - return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} - case 5: - return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} - case 6: - return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} - default: - panic("invalid level specified") - } -} - -const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. - baseMatchOffset = 1 // The smallest match offset - baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 - maxMatchOffset = 1 << 15 // The largest match offset - - bTableBits = 17 // Bits used in the big tables - bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. - bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. -) - -const ( - prime3bytes = 506832829 - prime4bytes = 2654435761 - prime5bytes = 889523592379 - prime6bytes = 227718039650203 - prime7bytes = 58295818150454627 - prime8bytes = 0xcf1bbcdcb7a56463 -) - -func load3232(b []byte, i int32) uint32 { - return le.Load32(b, i) -} - -func load6432(b []byte, i int32) uint64 { - return le.Load64(b, i) -} - -type tableEntry struct { - offset int32 -} - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastGen struct { - hist []byte - cur int32 -} - -func (e *fastGen) addBlock(src []byte) int32 { - // check if we have space already - if len(e.hist)+len(src) > cap(e.hist) { - if cap(e.hist) == 0 { - e.hist = make([]byte, 0, allocHistory) - } else { - if cap(e.hist) < maxMatchOffset*2 { - panic("unexpected buffer size") - } - // Move down - offset := int32(len(e.hist)) - maxMatchOffset - // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) - *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) - e.cur += offset - e.hist = e.hist[:maxMatchOffset] - } - } - s := int32(len(e.hist)) - e.hist = append(e.hist, src...) - return s -} - -type tableEntryPrev struct { - Cur tableEntry - Prev tableEntry -} - -// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) -} - -// hashLen returns a hash of the lowest mls bytes of with length output bits. -// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. -// length should always be < 32. -// Preferably length and mls should be a constant for inlining. -func hashLen(u uint64, length, mls uint8) uint32 { - switch mls { - case 3: - return (uint32(u<<8) * prime3bytes) >> (32 - length) - case 5: - return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) - case 6: - return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) - case 7: - return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) - case 8: - return uint32((u * prime8bytes) >> (64 - length)) - default: - return (uint32(u) * prime4bytes) >> (32 - length) - } -} - -// matchlen will return the match length between offsets and t in src. -// The maximum length returned is maxMatchLength - 4. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastGen) matchlen(s, t int, src []byte) int32 { - if debugDeflate { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > maxMatchOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - a := src[s:min(s+maxMatchLength-4, len(src))] - b := src[t:] - return int32(matchLen(a, b)) -} - -// matchlenLong will return the match length between offsets and t in src. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastGen) matchlenLong(s, t int, src []byte) int32 { - if debugDeflate { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > maxMatchOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - return int32(matchLen(src[s:], src[t:])) -} - -// Reset the encoding table. -func (e *fastGen) Reset() { - if cap(e.hist) < allocHistory { - e.hist = make([]byte, 0, allocHistory) - } - // We offset current position so everything will be out of reach. - // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. - if e.cur <= bufferReset { - e.cur += maxMatchOffset + int32(len(e.hist)) - } - e.hist = e.hist[:0] -} diff --git a/internal/compress/flate/flate_test.go b/internal/compress/flate/flate_test.go deleted file mode 100644 index 7b019548..00000000 --- a/internal/compress/flate/flate_test.go +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This test tests some internals of the flate package. -// The tests in package compress/gzip serve as the -// end-to-end test of the decompressor. - -package flate - -import ( - "archive/zip" - "bytes" - "compress/flate" - "encoding/hex" - "fmt" - "io" - "os" - "testing" -) - -// The following test should not panic. -func TestIssue5915(t *testing.T) { - bits := []int{ - 4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8, - } - var h huffmanDecoder - if h.init(bits) { - t.Fatalf("Given sequence of bits is bad, and should not succeed.") - } -} - -// The following test should not panic. -func TestIssue5962(t *testing.T) { - bits := []int{ - 4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, - 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, - } - var h huffmanDecoder - if h.init(bits) { - t.Fatalf("Given sequence of bits is bad, and should not succeed.") - } -} - -// The following test should not panic. -func TestIssue6255(t *testing.T) { - bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11} - bits2 := []int{11, 13} - var h huffmanDecoder - if !h.init(bits1) { - t.Fatalf("Given sequence of bits is good and should succeed.") - } - if h.init(bits2) { - t.Fatalf("Given sequence of bits is bad and should not succeed.") - } -} - -func TestInvalidEncoding(t *testing.T) { - // Initialize Huffman decoder to recognize "0". - var h huffmanDecoder - if !h.init([]int{1}) { - t.Fatal("Failed to initialize Huffman decoder") - } - - // Initialize decompressor with invalid Huffman coding. - var f decompressor - f.r = bytes.NewReader([]byte{0xff}) - - _, err := f.huffSym(&h) - if err == nil { - t.Fatal("Should have rejected invalid bit sequence") - } -} - -func TestRegressions(t *testing.T) { - // Test fuzzer regressions - data, err := os.ReadFile("testdata/regression.zip") - if err != nil { - t.Fatal(err) - } - zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) - if err != nil { - t.Fatal(err) - } - for _, tt := range zr.File { - data, err := tt.Open() - if err != nil { - t.Fatal(err) - } - data1, err := io.ReadAll(data) - if err != nil { - t.Fatal(err) - } - t.Run(tt.Name, func(t *testing.T) { - if testing.Short() && len(data1) > 10000 { - t.SkipNow() - } - for level := 0; level <= 9; level++ { - t.Run(fmt.Sprint(tt.Name+"-level", 1), func(t *testing.T) { - buf := new(bytes.Buffer) - fw, err := NewWriter(buf, level) - if err != nil { - t.Error(err) - } - n, err := fw.Write(data1) - if n != len(data1) { - t.Error("short write") - } - if err != nil { - t.Error(err) - } - err = fw.Close() - if err != nil { - t.Error(err) - } - fr1 := NewReader(buf) - data2, err := io.ReadAll(fr1) - if err != nil { - t.Error(err) - } - if !bytes.Equal(data1, data2) { - t.Error("not equal") - } - // Do it again... - buf.Reset() - fw.Reset(buf) - n, err = fw.Write(data1) - if n != len(data1) { - t.Error("short write") - } - if err != nil { - t.Error(err) - } - err = fw.Close() - if err != nil { - t.Error(err) - } - fr1 = flate.NewReader(buf) - data2, err = io.ReadAll(fr1) - if err != nil { - t.Error(err) - } - if !bytes.Equal(data1, data2) { - t.Error("not equal") - } - }) - } - t.Run(tt.Name+"stateless", func(t *testing.T) { - // Split into two and use history... - buf := new(bytes.Buffer) - err = StatelessDeflate(buf, data1[:len(data1)/2], false, nil) - if err != nil { - t.Error(err) - } - - // Use top half as dictionary... - dict := data1[:len(data1)/2] - err = StatelessDeflate(buf, data1[len(data1)/2:], true, dict) - if err != nil { - t.Error(err) - } - t.Log(buf.Len()) - fr1 := NewReader(buf) - data2, err := io.ReadAll(fr1) - if err != nil { - t.Error(err) - } - if !bytes.Equal(data1, data2) { - // fmt.Printf("want:%x\ngot: %x\n", data1, data2) - t.Error("not equal") - } - }) - }) - } -} - -func TestInvalidBits(t *testing.T) { - oversubscribed := []int{1, 2, 3, 4, 4, 5} - incomplete := []int{1, 2, 4, 4} - var h huffmanDecoder - if h.init(oversubscribed) { - t.Fatal("Should reject oversubscribed bit-length set") - } - if h.init(incomplete) { - t.Fatal("Should reject incomplete bit-length set") - } -} - -func TestStreams(t *testing.T) { - // To verify any of these hexstrings as valid or invalid flate streams - // according to the C zlib library, you can use the Python wrapper library: - // >>> hex_string = "010100feff11" - // >>> import zlib - // >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE - // '\x11' - - testCases := []struct { - desc string // Description of the stream - stream string // Hexstring of the input DEFLATE stream - want string // Expected result. Use "fail" to expect failure - }{{ - "degenerate HCLenTree", - "05e0010000000000100000000000000000000000000000000000000000000000" + - "00000000000000000004", - "fail", - }, { - "complete HCLenTree, empty HLitTree, empty HDistTree", - "05e0010400000000000000000000000000000000000000000000000000000000" + - "00000000000000000010", - "fail", - }, { - "empty HCLenTree", - "05e0010000000000000000000000000000000000000000000000000000000000" + - "00000000000000000010", - "fail", - }, { - "complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol", - "000100feff000de0010400000000100000000000000000000000000000000000" + - "0000000000000000000000000000002c", - "fail", - }, { - "complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol", - "000100feff000de0010000000000000000000000000000000000000000000000" + - "00000000000000000610000000004070", - "fail", - }, { - "complete HCLenTree, empty HLitTree, empty HDistTree", - "05e0010400000000100400000000000000000000000000000000000000000000" + - "0000000000000000000000000008", - "fail", - }, { - "complete HCLenTree, empty HLitTree, degenerate HDistTree", - "05e0010400000000100400000000000000000000000000000000000000000000" + - "0000000000000000000800000008", - "fail", - }, { - "complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol", - "05e0010400000000100000000000000000000000000000000000000000000000" + - "0000000000000000001c", - "fail", - }, { - "complete HCLenTree, complete HLitTree, too large HDistTree", - "edff870500000000200400000000000000000000000000000000000000000000" + - "000000000000000000080000000000000004", - "fail", - }, { - "complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code", - "edfd870500000000200400000000000000000000000000000000000000000000" + - "000000000000000000e8b100", - "fail", - }, { - "complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30", - "05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + - "ffffffffffffffffff07000000fe01", - "", - }, { - "complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31", - "05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + - "ffffffffffffffffff07000000fc03", - "fail", - }, { - "complete HCLenTree, over-subscribed HLitTree, empty HDistTree", - "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + - "ffffffffffffffffff07f00f", - "fail", - }, { - "complete HCLenTree, under-subscribed HLitTree, empty HDistTree", - "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + - "fffffffffcffffffff07f00f", - "fail", - }, { - "complete HCLenTree, complete HLitTree with single code, empty HDistTree", - "05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + - "ffffffffffffffffff07f00f", - "01", - }, { - "complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree", - "05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + - "ffffffffffffffffff07807f", - "01", - }, { - "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol", - "000100feff000de0010400000000100000000000000000000000000000000000" + - "0000000000000000000000000000003c", - "00000000", - }, { - "complete HCLenTree, degenerate HLitTree, degenerate HDistTree", - "05e0010400000000100000000000000000000000000000000000000000000000" + - "0000000000000000000c", - "", - }, { - "complete HCLenTree, degenerate HLitTree, empty HDistTree", - "05e0010400000000100000000000000000000000000000000000000000000000" + - "00000000000000000004", - "", - }, { - "complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code", - "edfd870500000000200400000000000000000000000000000000000000000000" + - "000000000000000000e8b000", - "", - }, { - "complete HCLenTree with length codes, complete HLitTree, empty HDistTree", - "ede0010400000000100000000000000000000000000000000000000000000000" + - "0000000000000000000400004000", - "", - }, { - "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31", - "000100feff00ede0010400000000100000000000000000000000000000000000" + - "000000000000000000000000000000040000407f00", - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "0000000000000000000000000000000000000000000000000000000000000000" + - "000000", - }, { - "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols", - "0cc2010d00000082b0ac4aff0eb07d27060000ffff", - "616263616263", - }, { - "fixed block, use reserved symbol 287", - "33180700", - "fail", - }, { - "raw block", - "010100feff11", - "11", - }, { - "issue 10426 - over-subscribed HCLenTree causes a hang", - "344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff", - "fail", - }, { - "issue 11030 - empty HDistTree unexpectedly leads to error", - "05c0070600000080400fff37a0ca", - "", - }, { - "issue 11033 - empty HDistTree unexpectedly leads to error", - "050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" + - "b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701", - "3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" + - "c6d7", - }} - - for i, tc := range testCases { - data, err := hex.DecodeString(tc.stream) - if err != nil { - t.Fatal(err) - } - data, err = io.ReadAll(NewReader(bytes.NewReader(data))) - if tc.want == "fail" { - if err == nil { - t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc) - } - } else { - if err != nil { - t.Errorf("#%d (%s): %v", i, tc.desc, err) - continue - } - if got := hex.EncodeToString(data); got != tc.want { - t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want) - } - - } - } -} diff --git a/internal/compress/flate/fuzz_test.go b/internal/compress/flate/fuzz_test.go deleted file mode 100644 index b3d0098f..00000000 --- a/internal/compress/flate/fuzz_test.go +++ /dev/null @@ -1,176 +0,0 @@ -//go:build go1.18 - -package flate - -import ( - "bytes" - "flag" - "io" - "os" - "strconv" - "testing" - - "lindenii.org/go/furgit/internal/compress/internal/fuzz" -) - -// Fuzzing tweaks: -var ( - fuzzStartF = flag.Int("start", HuffmanOnly, "Start fuzzing at this level") - fuzzEndF = flag.Int("end", BestCompression, "End fuzzing at this level (inclusive)") - fuzzMaxF = flag.Int("max", 1<<20, "Maximum input size") - fuzzSLF = flag.Bool("sl", true, "Include stateless encodes") - fuzzWindow = flag.Bool("windows", true, "Include windowed encodes") -) - -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} - -func FuzzEncoding(f *testing.F) { - fuzz.AddFromZip(f, "testdata/regression.zip", fuzz.TypeRaw, false) - fuzz.AddFromZip(f, "testdata/fuzz/encode-raw-corpus.zip", fuzz.TypeRaw, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/FuzzEncoding.zip", fuzz.TypeGoFuzz, testing.Short()) - - startFuzz := *fuzzStartF - endFuzz := *fuzzEndF - maxSize := *fuzzMaxF - stateless := *fuzzSLF - fuzzWindow := *fuzzWindow - - decoder := NewReader(nil) - buf := new(bytes.Buffer) - encs := make([]*Writer, endFuzz-startFuzz+1) - for i := range encs { - var err error - encs[i], err = NewWriter(nil, i+startFuzz) - if err != nil { - f.Fatal(err.Error()) - } - } - - f.Fuzz(func(t *testing.T, data []byte) { - if len(data) > maxSize { - return - } - for level := startFuzz; level <= endFuzz; level++ { - msg := "level " + strconv.Itoa(level) + ":" - buf.Reset() - fw := encs[level-startFuzz] - fw.Reset(buf) - n, err := fw.Write(data) - if n != len(data) { - t.Fatal(msg + "short write") - } - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - decoder.(Resetter).Reset(buf, nil) - data2, err := io.ReadAll(decoder) - if err != nil { - t.Fatal(msg + err.Error()) - } - if !bytes.Equal(data, data2) { - t.Fatal(msg + "not equal") - } - // Do it again... (also uses copy) - msg = "level " + strconv.Itoa(level) + " (reset):" - buf.Reset() - fw.Reset(buf) - _, err = io.Copy(fw, bytes.NewReader(data)) - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - decoder.(Resetter).Reset(buf, nil) - data2, err = io.ReadAll(decoder) - if err != nil { - t.Fatal(msg + err.Error()) - } - if !bytes.Equal(data, data2) { - t.Fatal(msg + "not equal") - } - } - if stateless { - // Split into two and use history... - msg := "stateless:" - buf.Reset() - err := StatelessDeflate(buf, data[:len(data)/2], false, nil) - if err != nil { - t.Error(err) - } - - // Use top half as dictionary... - dict := data[:len(data)/2] - err = StatelessDeflate(buf, data[len(data)/2:], true, dict) - if err != nil { - t.Error(err) - } - - decoder.(Resetter).Reset(buf, nil) - data2, err := io.ReadAll(decoder) - if err != nil { - t.Error(err) - } - if !bytes.Equal(data, data2) { - // fmt.Printf("want:%x\ngot: %x\n", data1, data2) - t.Error(msg + "not equal") - } - } - if fuzzWindow { - msg := "windowed:" - buf.Reset() - fw, err := NewWriterWindow(buf, 1000) - if err != nil { - t.Fatal(msg + err.Error()) - } - fw.Reset(buf) - n, err := fw.Write(data) - if n != len(data) { - t.Fatal(msg + "short write") - } - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - decoder.(Resetter).Reset(buf, nil) - data2, err := io.ReadAll(decoder) - if err != nil { - t.Fatal(msg + err.Error()) - } - if !bytes.Equal(data, data2) { - t.Fatal(msg + "not equal") - } - // Do it again... - msg = msg + " (reset):" - buf.Reset() - fw.Reset(buf) - n, err = fw.Write(data) - if n != len(data) { - t.Fatal(msg + "short write") - } - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - decoder.(Resetter).Reset(buf, nil) - data2, err = io.ReadAll(decoder) - if err != nil { - t.Fatal(msg + err.Error()) - } - } - }) -} diff --git a/internal/compress/flate/huffman_bit_writer.go b/internal/compress/flate/huffman_bit_writer.go deleted file mode 100644 index 18dff811..00000000 --- a/internal/compress/flate/huffman_bit_writer.go +++ /dev/null @@ -1,1174 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "fmt" - "io" - "math" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -const ( - // The largest offset code. - offsetCodeCount = 30 - - // The special code used to mark the end of a block. - endBlockMarker = 256 - - // The first length code. - lengthCodesStart = 257 - - // The number of codegen codes. - codegenCodeCount = 19 - badCode = 255 - - // maxPredefinedTokens is the maximum number of tokens - // where we check if fixed size is smaller. - maxPredefinedTokens = 250 - - // bufferFlushSize indicates the buffer size - // after which bytes are flushed to the writer. - // Should preferably be a multiple of 6, since - // we accumulate 6 bytes between writes to the buffer. - bufferFlushSize = 246 -) - -// Minimum length code that emits bits. -const lengthExtraBitsMinCode = 8 - -// The number of extra bits needed by length code X - LENGTH_CODES_START. -var lengthExtraBits = [32]uint8{ - /* 257 */ 0, 0, 0, - /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, - /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, - /* 280 */ 4, 5, 5, 5, 5, 0, -} - -// The length indicated by length code X - LENGTH_CODES_START. -var lengthBase = [32]uint8{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, - 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, - 64, 80, 96, 112, 128, 160, 192, 224, 255, -} - -// Minimum offset code that emits bits. -const offsetExtraBitsMinCode = 4 - -// offset code word extra bits. -var offsetExtraBits = [32]int8{ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - /* extended window */ - 14, 14, -} - -var offsetCombined = [32]uint32{} - -func init() { - offsetBase := [32]uint32{ - /* normal deflate */ - 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, - 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, - 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, - 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, - 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, - 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, - - /* extended window */ - 0x008000, 0x00c000, - } - - for i := range offsetCombined[:] { - // Don't use extended window values... - if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { - continue - } - offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) - } -} - -// The odd order in which the codegen code sizes are written. -var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -type huffmanBitWriter struct { - // writer is the underlying writer. - // Do not use it directly; use the write method, which ensures - // that Write errors are sticky. - writer io.Writer - - // Data waiting to be written is bytes[0:nbytes] - // and then the low nbits of bits. - bits uint64 - nbits uint8 - nbytes uint8 - lastHuffMan bool - literalEncoding *huffmanEncoder - tmpLitEncoding *huffmanEncoder - offsetEncoding *huffmanEncoder - codegenEncoding *huffmanEncoder - err error - lastHeader int - // Set between 0 (reused block can be up to 2x the size) - logNewTablePenalty uint - bytes [256 + 8]byte - literalFreq [lengthCodesStart + 32]uint16 - offsetFreq [32]uint16 - codegenFreq [codegenCodeCount]uint16 - - // codegen must have an extra space for the final symbol. - codegen [literalCount + offsetCodeCount + 1]uint8 -} - -// Huffman reuse. -// -// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. -// -// This is controlled by several variables: -// -// If lastHeader is non-zero the Huffman table can be reused. -// This also indicates that a Huffman table has been generated that can output all -// possible symbols. -// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated -// an EOB with the previous table must be written. -// -// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. -// -// An incoming block estimates the output size of a new table using a 'fresh' by calculating the -// optimal size and adding a penalty in 'logNewTablePenalty'. -// A Huffman table is not optimal, which is why we add a penalty, and generating a new table -// is slower both for compression and decompression. - -func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { - return &huffmanBitWriter{ - writer: w, - literalEncoding: newHuffmanEncoder(literalCount), - tmpLitEncoding: newHuffmanEncoder(literalCount), - codegenEncoding: newHuffmanEncoder(codegenCodeCount), - offsetEncoding: newHuffmanEncoder(offsetCodeCount), - } -} - -func (w *huffmanBitWriter) reset(writer io.Writer) { - w.writer = writer - w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil - w.lastHeader = 0 - w.lastHuffMan = false -} - -func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { - a := t.offHist[:offsetCodeCount] - b := w.offsetEncoding.codes - b = b[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - - a = t.extraHist[:literalCount-256] - b = w.literalEncoding.codes[256:literalCount] - b = b[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - - a = t.litHist[:256] - b = w.literalEncoding.codes[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - return true -} - -func (w *huffmanBitWriter) flush() { - if w.err != nil { - w.nbits = 0 - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - n := w.nbytes - for w.nbits != 0 { - w.bytes[n] = byte(w.bits) - w.bits >>= 8 - if w.nbits > 8 { // Avoid underflow - w.nbits -= 8 - } else { - w.nbits = 0 - } - n++ - } - w.bits = 0 - if n > 0 { - w.write(w.bytes[:n]) - } - w.nbytes = 0 -} - -func (w *huffmanBitWriter) write(b []byte) { - if w.err != nil { - return - } - _, w.err = w.writer.Write(b) -} - -func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { - w.bits |= uint64(b) << (w.nbits & 63) - w.nbits += nb - if w.nbits >= 48 { - w.writeOutBits() - } -} - -func (w *huffmanBitWriter) writeBytes(bytes []byte) { - if w.err != nil { - return - } - n := w.nbytes - if w.nbits&7 != 0 { - w.err = InternalError("writeBytes with unfinished bits") - return - } - for w.nbits != 0 { - w.bytes[n] = byte(w.bits) - w.bits >>= 8 - w.nbits -= 8 - n++ - } - if n != 0 { - w.write(w.bytes[:n]) - } - w.nbytes = 0 - w.write(bytes) -} - -// RFC 1951 3.2.7 specifies a special run-length encoding for specifying -// the literal and offset lengths arrays (which are concatenated into a single -// array). This method generates that run-length encoding. -// -// The result is written into the codegen array, and the frequencies -// of each code is written into the codegenFreq array. -// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional -// information. Code badCode is an end marker -// -// numLiterals The number of literals in literalEncoding -// numOffsets The number of offsets in offsetEncoding -// litenc, offenc The literal and offset encoder to use -func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { - for i := range w.codegenFreq { - w.codegenFreq[i] = 0 - } - // Note that we are using codegen both as a temporary variable for holding - // a copy of the frequencies, and as the place where we put the result. - // This is fine because the output is always shorter than the input used - // so far. - codegen := w.codegen[:] // cache - // Copy the concatenated code sizes to codegen. Put a marker at the end. - cgnl := codegen[:numLiterals] - for i := range cgnl { - cgnl[i] = litEnc.codes[i].len() - } - - cgnl = codegen[numLiterals : numLiterals+numOffsets] - for i := range cgnl { - cgnl[i] = offEnc.codes[i].len() - } - codegen[numLiterals+numOffsets] = badCode - - size := codegen[0] - count := 1 - outIndex := 0 - for inIndex := 1; size != badCode; inIndex++ { - // INVARIANT: We have seen "count" copies of size that have not yet - // had output generated for them. - nextSize := codegen[inIndex] - if nextSize == size { - count++ - continue - } - // We need to generate codegen indicating "count" of size. - if size != 0 { - codegen[outIndex] = size - outIndex++ - w.codegenFreq[size]++ - count-- - for count >= 3 { - n := min(6, count) - codegen[outIndex] = 16 - outIndex++ - codegen[outIndex] = uint8(n - 3) - outIndex++ - w.codegenFreq[16]++ - count -= n - } - } else { - for count >= 11 { - n := min(138, count) - codegen[outIndex] = 18 - outIndex++ - codegen[outIndex] = uint8(n - 11) - outIndex++ - w.codegenFreq[18]++ - count -= n - } - if count >= 3 { - // count >= 3 && count <= 10 - codegen[outIndex] = 17 - outIndex++ - codegen[outIndex] = uint8(count - 3) - outIndex++ - w.codegenFreq[17]++ - count = 0 - } - } - count-- - for ; count >= 0; count-- { - codegen[outIndex] = size - outIndex++ - w.codegenFreq[size]++ - } - // Set up invariant for next time through the loop. - size = nextSize - count = 1 - } - // Marker indicating the end of the codegen. - codegen[outIndex] = badCode -} - -func (w *huffmanBitWriter) codegens() int { - numCodegens := len(w.codegenFreq) - for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { - numCodegens-- - } - return numCodegens -} - -func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { - numCodegens = len(w.codegenFreq) - for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { - numCodegens-- - } - return 3 + 5 + 5 + 4 + (3 * numCodegens) + - w.codegenEncoding.bitLength(w.codegenFreq[:]) + - int(w.codegenFreq[16])*2 + - int(w.codegenFreq[17])*3 + - int(w.codegenFreq[18])*7, numCodegens -} - -// dynamicSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { - size = litEnc.bitLength(w.literalFreq[:]) + - offEnc.bitLength(w.offsetFreq[:]) - return size -} - -// dynamicSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { - header, numCodegens := w.headerSize() - size = header + - litEnc.bitLength(w.literalFreq[:]) + - offEnc.bitLength(w.offsetFreq[:]) + - extraBits - return size, numCodegens -} - -// extraBitSize will return the number of bits that will be written -// as "extra" bits on matches. -func (w *huffmanBitWriter) extraBitSize() int { - total := 0 - for i, n := range w.literalFreq[257:literalCount] { - total += int(n) * int(lengthExtraBits[i&31]) - } - for i, n := range w.offsetFreq[:offsetCodeCount] { - total += int(n) * int(offsetExtraBits[i&31]) - } - return total -} - -// fixedSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) fixedSize(extraBits int) int { - return 3 + - fixedLiteralEncoding.bitLength(w.literalFreq[:]) + - fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + - extraBits -} - -// storedSize calculates the stored size, including header. -// The function returns the size in bits and whether the block -// fits inside a single block. -func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { - if in == nil { - return 0, false - } - if len(in) <= maxStoreBlockSize { - return (len(in) + 5) * 8, true - } - return 0, false -} - -func (w *huffmanBitWriter) writeCode(c hcode) { - // The function does not get inlined if we "& 63" the shift. - w.bits |= c.code64() << (w.nbits & 63) - w.nbits += c.len() - if w.nbits >= 48 { - w.writeOutBits() - } -} - -// writeOutBits will write bits to the buffer. -func (w *huffmanBitWriter) writeOutBits() { - bits := w.bits - w.bits >>= 48 - w.nbits -= 48 - n := w.nbytes - - // We overwrite, but faster... - le.Store64(w.bytes[:], n, bits) - n += 6 - - if n >= bufferFlushSize { - if w.err != nil { - n = 0 - return - } - w.write(w.bytes[:n]) - n = 0 - } - - w.nbytes = n -} - -// Write the header of a dynamic Huffman block to the output stream. -// -// numLiterals The number of literals specified in codegen -// numOffsets The number of offsets specified in codegen -// numCodegens The number of codegens used in codegen -func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { - if w.err != nil { - return - } - var firstBits int32 = 4 - if isEof { - firstBits = 5 - } - w.writeBits(firstBits, 3) - w.writeBits(int32(numLiterals-257), 5) - w.writeBits(int32(numOffsets-1), 5) - w.writeBits(int32(numCodegens-4), 4) - - for i := range numCodegens { - value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) - w.writeBits(int32(value), 3) - } - - i := 0 - for { - codeWord := uint32(w.codegen[i]) - i++ - if codeWord == badCode { - break - } - w.writeCode(w.codegenEncoding.codes[codeWord]) - - switch codeWord { - case 16: - w.writeBits(int32(w.codegen[i]), 2) - i++ - case 17: - w.writeBits(int32(w.codegen[i]), 3) - i++ - case 18: - w.writeBits(int32(w.codegen[i]), 7) - i++ - } - } -} - -// writeStoredHeader will write a stored header. -// If the stored block is only used for EOF, -// it is replaced with a fixed huffman block. -func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { - if w.err != nil { - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. - if length == 0 && isEof { - w.writeFixedHeader(isEof) - // EOB: 7 bits, value: 0 - w.writeBits(0, 7) - w.flush() - return - } - - var flag int32 - if isEof { - flag = 1 - } - w.writeBits(flag, 3) - w.flush() - w.writeBits(int32(length), 16) - w.writeBits(int32(^uint16(length)), 16) -} - -func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { - if w.err != nil { - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - // Indicate that we are a fixed Huffman block - var value int32 = 2 - if isEof { - value = 3 - } - w.writeBits(value, 3) -} - -// writeBlock will write a block of tokens with the smallest encoding. -// The original input can be supplied, and if the huffman encoded data -// is larger than the original bytes, the data will be written as a -// stored block. -// If the input is nil, the tokens will always be Huffman encoded. -func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { - if w.err != nil { - return - } - - tokens.AddEOB() - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - numLiterals, numOffsets := w.indexTokens(tokens, false) - w.generate() - var extraBits int - storedSize, storable := w.storedSize(input) - if storable { - extraBits = w.extraBitSize() - } - - // Figure out smallest code. - // Fixed Huffman baseline. - literalEncoding := fixedLiteralEncoding - offsetEncoding := fixedOffsetEncoding - size := math.MaxInt32 - if tokens.n < maxPredefinedTokens { - size = w.fixedSize(extraBits) - } - - // Dynamic Huffman? - var numCodegens int - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) - - if dynamicSize < size { - size = dynamicSize - literalEncoding = w.literalEncoding - offsetEncoding = w.offsetEncoding - } - - // Stored bytes? - if storable && storedSize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - // Huffman. - if literalEncoding == fixedLiteralEncoding { - w.writeFixedHeader(eof) - } else { - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - } - - // Write the tokens. - w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) -} - -// writeBlockDynamic encodes a block using a dynamic Huffman table. -// This should be used if the symbols used have a disproportionate -// histogram distribution. -// If input is supplied and the compression savings are below 1/16th of the -// input size the block is stored. -func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { - if w.err != nil { - return - } - - sync = sync || eof - if sync { - tokens.AddEOB() - } - - // We cannot reuse pure huffman table, and must mark as EOF. - if (w.lastHuffMan || eof) && w.lastHeader > 0 { - // We will not try to reuse. - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - w.lastHuffMan = false - } - - // fillReuse enables filling of empty values. - // This will make encodings always reusable without testing. - // However, this does not appear to benefit on most cases. - const fillReuse = false - - // Check if we can reuse... - if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - numLiterals, numOffsets := w.indexTokens(tokens, true) - extraBits := 0 - ssize, storable := w.storedSize(input) - - const usePrefs = true - if storable || w.lastHeader > 0 { - extraBits = w.extraBitSize() - } - - var size int - - // Check if we should reuse. - if w.lastHeader > 0 { - // Estimate size for using a new table. - // Use the previous header size as the best estimate. - newSize := w.lastHeader + tokens.EstimatedBits() - newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty - - // The estimated size is calculated as an optimal table. - // We add a penalty to make it more realistic and re-use a bit more. - reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits - - // Check if a new table is better. - if newSize < reuseSize { - // Write the EOB we owe. - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - size = newSize - w.lastHeader = 0 - } else { - size = reuseSize - } - - if tokens.n < maxPredefinedTokens { - if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { - // Check if we get a reasonable size decrease. - if storable && ssize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - w.writeFixedHeader(eof) - if !sync { - tokens.AddEOB() - } - w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) - return - } - } - // Check if we get a reasonable size decrease. - if storable && ssize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - } - - // We want a new block/table - if w.lastHeader == 0 { - if fillReuse && !sync { - w.fillTokens() - numLiterals, numOffsets = maxNumLit, maxNumDist - } else { - w.literalFreq[endBlockMarker] = 1 - } - - w.generate() - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - - var numCodegens int - if fillReuse && !sync { - // Reindex for accurate size... - w.indexTokens(tokens, true) - } - size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) - - // Store predefined, if we don't get a reasonable improvement. - if tokens.n < maxPredefinedTokens { - if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { - // Store bytes, if we don't get an improvement. - if storable && ssize <= preSize { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - w.writeFixedHeader(eof) - if !sync { - tokens.AddEOB() - } - w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) - return - } - } - - if storable && ssize <= size { - // Store bytes, if we don't get an improvement. - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - // Write Huffman table. - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - if !sync { - w.lastHeader, _ = w.headerSize() - } - w.lastHuffMan = false - } - - if sync { - w.lastHeader = 0 - } - // Write the tokens. - w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) -} - -func (w *huffmanBitWriter) fillTokens() { - for i, v := range w.literalFreq[:literalCount] { - if v == 0 { - w.literalFreq[i] = 1 - } - } - for i, v := range w.offsetFreq[:offsetCodeCount] { - if v == 0 { - w.offsetFreq[i] = 1 - } - } -} - -// indexTokens indexes a slice of tokens, and updates -// literalFreq and offsetFreq, and generates literalEncoding -// and offsetEncoding. -// The number of literal and offset tokens is returned. -func (w *huffmanBitWriter) indexTokens(t *tokens, alwaysEOB bool) (numLiterals, numOffsets int) { - // copy(w.literalFreq[:], t.litHist[:]) - *(*[256]uint16)(w.literalFreq[:]) = t.litHist - // copy(w.literalFreq[256:], t.extraHist[:]) - *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist - w.offsetFreq = t.offHist - - if t.n == 0 { - return - } - if alwaysEOB { - w.literalFreq[endBlockMarker] = 1 - } - - // get the number of literals - numLiterals = len(w.literalFreq) - for w.literalFreq[numLiterals-1] == 0 { - numLiterals-- - } - // get the number of offsets - numOffsets = len(w.offsetFreq) - for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { - numOffsets-- - } - if numOffsets == 0 { - // We haven't found a single match. If we want to go with the dynamic encoding, - // we should count at least one offset to be sure that the offset huffman tree could be encoded. - w.offsetFreq[0] = 1 - numOffsets = 1 - } - return -} - -func (w *huffmanBitWriter) generate() { - w.literalEncoding.generate(w.literalFreq[:literalCount], 15) - w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) -} - -// writeTokens writes a slice of tokens to the output. -// codes for literal and offset encoding must be supplied. -func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { - if w.err != nil { - return - } - if len(tokens) == 0 { - return - } - - // Only last token should be endBlockMarker. - var deferEOB bool - if tokens[len(tokens)-1] == endBlockMarker { - tokens = tokens[:len(tokens)-1] - deferEOB = true - } - - // Create slices up to the next power of two to avoid bounds checks. - lits := leCodes[:256] - offs := oeCodes[:32] - lengths := leCodes[lengthCodesStart:] - lengths = lengths[:32] - - // Go 1.16 LOVES having these on stack. - bits, nbits, nbytes := w.bits, w.nbits, w.nbytes - - for _, t := range tokens { - if t < 256 { - // w.writeCode(lits[t.literal()]) - c := lits[t] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - continue - } - - // Write the length - length := t.length() - lengthCode := lengthCode(length) & 31 - if false { - w.writeCode(lengths[lengthCode]) - } else { - // inlined - c := lengths[lengthCode] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - - if lengthCode >= lengthExtraBitsMinCode { - extraLengthBits := lengthExtraBits[lengthCode] - // w.writeBits(extraLength, extraLengthBits) - extraLength := int32(length - lengthBase[lengthCode]) - bits |= uint64(extraLength) << (nbits & 63) - nbits += extraLengthBits - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - // Write the offset - offset := t.offset() - offsetCode := (offset >> 16) & 31 - if false { - w.writeCode(offs[offsetCode]) - } else { - // inlined - c := offs[offsetCode] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - - if offsetCode >= offsetExtraBitsMinCode { - offsetComb := offsetCombined[offsetCode] - // w.writeBits(extraOffset, extraOffsetBits) - bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) - nbits += uint8(offsetComb) - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - } - // Restore... - w.bits, w.nbits, w.nbytes = bits, nbits, nbytes - - if deferEOB { - w.writeCode(leCodes[endBlockMarker]) - } -} - -// huffOffset is a static offset encoder used for huffman only encoding. -// It can be reused since we will not be encoding offset values. -var huffOffset *huffmanEncoder - -func init() { - w := newHuffmanBitWriter(nil) - w.offsetFreq[0] = 1 - huffOffset = newHuffmanEncoder(offsetCodeCount) - huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) -} - -// writeBlockHuff encodes a block of bytes as either -// Huffman encoded literals or uncompressed bytes if the -// results only gains very little from compression. -func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { - if w.err != nil { - return - } - - // Clear histogram - for i := range w.literalFreq[:] { - w.literalFreq[i] = 0 - } - if !w.lastHuffMan { - for i := range w.offsetFreq[:] { - w.offsetFreq[i] = 0 - } - } - - const numLiterals = endBlockMarker + 1 - const numOffsets = 1 - - // Add everything as literals - // We have to estimate the header size. - // Assume header is around 70 bytes: - // https://stackoverflow.com/a/25454430 - const guessHeaderSizeBits = 70 * 8 - histogram(input, w.literalFreq[:numLiterals]) - ssize, storable := w.storedSize(input) - if storable && len(input) > 1024 { - // Quick check for incompressible content. - abs := float64(0) - avg := float64(len(input)) / 256 - max := float64(len(input) * 2) - for _, v := range w.literalFreq[:256] { - diff := float64(v) - avg - abs += diff * diff - if abs > max { - break - } - } - if abs < max { - if debugDeflate { - fmt.Println("stored", abs, "<", max) - } - // No chance we can compress this... - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - } - w.literalFreq[endBlockMarker] = 1 - w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) - estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) - if estBits < math.MaxInt32 { - estBits += w.lastHeader - if w.lastHeader == 0 { - estBits += guessHeaderSizeBits - } - estBits += estBits >> w.logNewTablePenalty - } - - // Store bytes, if we don't get a reasonable improvement. - if storable && ssize <= estBits { - if debugDeflate { - fmt.Println("stored,", ssize, "<=", estBits) - } - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - if w.lastHeader > 0 { - reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) - - if estBits < reuseSize { - if debugDeflate { - fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") - } - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } else if debugDeflate { - fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) - } - } - - count := 0 - if w.lastHeader == 0 { - // Use the temp encoding, so swap. - w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - numCodegens := w.codegens() - - // Huffman. - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - w.lastHuffMan = true - w.lastHeader, _ = w.headerSize() - if debugDeflate { - count += w.lastHeader - fmt.Println("header:", count/8) - } - } - - encoding := w.literalEncoding.codes[:256] - // Go 1.16 LOVES having these on stack. At least 1.5x the speed. - bits, nbits, nbytes := w.bits, w.nbits, w.nbytes - - if debugDeflate { - count -= int(nbytes)*8 + int(nbits) - } - // Unroll, write 3 codes/loop. - // Fastest number of unrolls. - for len(input) > 3 { - // We must have at least 48 bits free. - if nbits >= 8 { - n := nbits >> 3 - le.Store64(w.bytes[:], nbytes, bits) - bits >>= (n * 8) & 63 - nbits -= n * 8 - nbytes += n - } - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - if debugDeflate { - count += int(nbytes) * 8 - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - a, b := encoding[input[0]], encoding[input[1]] - bits |= a.code64() << (nbits & 63) - bits |= b.code64() << ((nbits + a.len()) & 63) - c := encoding[input[2]] - nbits += b.len() + a.len() - bits |= c.code64() << (nbits & 63) - nbits += c.len() - input = input[3:] - } - - // Remaining... - for _, t := range input { - if nbits >= 48 { - le.Store64(w.bytes[:], nbytes, bits) - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - if debugDeflate { - count += int(nbytes) * 8 - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - // Bitwriting inlined, ~30% speedup - c := encoding[t] - bits |= c.code64() << (nbits & 63) - - nbits += c.len() - if debugDeflate { - count += int(c.len()) - } - } - // Restore... - w.bits, w.nbits, w.nbytes = bits, nbits, nbytes - - if debugDeflate { - nb := count + int(nbytes)*8 + int(nbits) - fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") - } - // Flush if needed to have space. - if w.nbits >= 48 { - w.writeOutBits() - } - - if eof || sync { - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - w.lastHuffMan = false - } -} diff --git a/internal/compress/flate/huffman_bit_writer_test.go b/internal/compress/flate/huffman_bit_writer_test.go deleted file mode 100644 index 3fc414e2..00000000 --- a/internal/compress/flate/huffman_bit_writer_test.go +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "flag" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -var update = flag.Bool("update", false, "update reference files") - -// TestBlockHuff tests huffman encoding against reference files -// to detect possible regressions. -// If encoding/bit allocation changes you can regenerate these files -// by using the -update flag. -func TestBlockHuff(t *testing.T) { - // determine input files - match, err := filepath.Glob("testdata/huffman-*.in") - if err != nil { - t.Fatal(err) - } - - for _, in := range match { - out := in // for files where input and output are identical - if strings.HasSuffix(in, ".in") { - out = in[:len(in)-len(".in")] + ".golden" - } - t.Run(in, func(t *testing.T) { - testBlockHuff(t, in, out) - }) - } -} - -func testBlockHuff(t *testing.T, in, out string) { - all, err := os.ReadFile(in) - if err != nil { - t.Error(err) - return - } - var buf bytes.Buffer - bw := newHuffmanBitWriter(&buf) - bw.logNewTablePenalty = 8 - bw.writeBlockHuff(false, all, false) - bw.flush() - got := buf.Bytes() - - want, err := os.ReadFile(out) - if err != nil && !*update { - t.Error(err) - return - } - - t.Logf("Testing %q", in) - if !bytes.Equal(got, want) { - if *update { - if in != out { - t.Logf("Updating %q", out) - if err := os.WriteFile(out, got, 0o666); err != nil { - t.Error(err) - } - return - } - // in == out: don't accidentally destroy input - t.Errorf("WARNING: -update did not rewrite input file %s", in) - } - - t.Errorf("%q != %q (see %q)", in, out, in+".got") - if err := os.WriteFile(in+".got", got, 0o666); err != nil { - t.Error(err) - } - return - } - t.Log("Output ok") - - // Test if the writer produces the same output after reset. - buf.Reset() - bw.reset(&buf) - bw.writeBlockHuff(false, all, false) - bw.flush() - got = buf.Bytes() - if !bytes.Equal(got, want) { - t.Errorf("after reset %q != %q (see %q)", in, out, in+".reset.got") - if err := os.WriteFile(in+".reset.got", got, 0o666); err != nil { - t.Error(err) - } - return - } - t.Log("Reset ok") - testWriterEOF(t, "huff", huffTest{input: in}, true) -} - -type huffTest struct { - tokens []token - input string // File name of input data matching the tokens. - want string // File name of data with the expected output with input available. - wantNoInput string // File name of the expected output when no input is available. -} - -const ml = 0x7fc00000 // Maximum length token. Used to reduce the size of writeBlockTests - -var writeBlockTests = []huffTest{ - { - input: "testdata/huffman-null-max.in", - want: "testdata/huffman-null-max.%s.expect", - wantNoInput: "testdata/huffman-null-max.%s.expect-noinput", - tokens: []token{0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, 0x0, 0x0}, - }, - { - input: "testdata/huffman-pi.in", - want: "testdata/huffman-pi.%s.expect", - wantNoInput: "testdata/huffman-pi.%s.expect-noinput", - tokens: []token{0x33, 0x2e, 0x31, 0x34, 0x31, 0x35, 0x39, 0x32, 0x36, 0x35, 0x33, 0x35, 0x38, 0x39, 0x37, 0x39, 0x33, 0x32, 0x33, 0x38, 0x34, 0x36, 0x32, 0x36, 0x34, 0x33, 0x33, 0x38, 0x33, 0x32, 0x37, 0x39, 0x35, 0x30, 0x32, 0x38, 0x38, 0x34, 0x31, 0x39, 0x37, 0x31, 0x36, 0x39, 0x33, 0x39, 0x39, 0x33, 0x37, 0x35, 0x31, 0x30, 0x35, 0x38, 0x32, 0x30, 0x39, 0x37, 0x34, 0x39, 0x34, 0x34, 0x35, 0x39, 0x32, 0x33, 0x30, 0x37, 0x38, 0x31, 0x36, 0x34, 0x30, 0x36, 0x32, 0x38, 0x36, 0x32, 0x30, 0x38, 0x39, 0x39, 0x38, 0x36, 0x32, 0x38, 0x30, 0x33, 0x34, 0x38, 0x32, 0x35, 0x33, 0x34, 0x32, 0x31, 0x31, 0x37, 0x30, 0x36, 0x37, 0x39, 0x38, 0x32, 0x31, 0x34, 0x38, 0x30, 0x38, 0x36, 0x35, 0x31, 0x33, 0x32, 0x38, 0x32, 0x33, 0x30, 0x36, 0x36, 0x34, 0x37, 0x30, 0x39, 0x33, 0x38, 0x34, 0x34, 0x36, 0x30, 0x39, 0x35, 0x35, 0x30, 0x35, 0x38, 0x32, 0x32, 0x33, 0x31, 0x37, 0x32, 0x35, 0x33, 0x35, 0x39, 0x34, 0x30, 0x38, 0x31, 0x32, 0x38, 0x34, 0x38, 0x31, 0x31, 0x31, 0x37, 0x34, 0x4040007e, 0x34, 0x31, 0x30, 0x32, 0x37, 0x30, 0x31, 0x39, 0x33, 0x38, 0x35, 0x32, 0x31, 0x31, 0x30, 0x35, 0x35, 0x35, 0x39, 0x36, 0x34, 0x34, 0x36, 0x32, 0x32, 0x39, 0x34, 0x38, 0x39, 0x35, 0x34, 0x39, 0x33, 0x30, 0x33, 0x38, 0x31, 0x40400012, 0x32, 0x38, 0x38, 0x31, 0x30, 0x39, 0x37, 0x35, 0x36, 0x36, 0x35, 0x39, 0x33, 0x33, 0x34, 0x34, 0x36, 0x40400047, 0x37, 0x35, 0x36, 0x34, 0x38, 0x32, 0x33, 0x33, 0x37, 0x38, 0x36, 0x37, 0x38, 0x33, 0x31, 0x36, 0x35, 0x32, 0x37, 0x31, 0x32, 0x30, 0x31, 0x39, 0x30, 0x39, 0x31, 0x34, 0x4040001a, 0x35, 0x36, 0x36, 0x39, 0x32, 0x33, 0x34, 0x36, 0x404000b2, 0x36, 0x31, 0x30, 0x34, 0x35, 0x34, 0x33, 0x32, 0x36, 0x40400032, 0x31, 0x33, 0x33, 0x39, 0x33, 0x36, 0x30, 0x37, 0x32, 0x36, 0x30, 0x32, 0x34, 0x39, 0x31, 0x34, 0x31, 0x32, 0x37, 0x33, 0x37, 0x32, 0x34, 0x35, 0x38, 0x37, 0x30, 0x30, 0x36, 0x36, 0x30, 0x36, 0x33, 0x31, 0x35, 0x35, 0x38, 0x38, 0x31, 0x37, 0x34, 0x38, 0x38, 0x31, 0x35, 0x32, 0x30, 0x39, 0x32, 0x30, 0x39, 0x36, 0x32, 0x38, 0x32, 0x39, 0x32, 0x35, 0x34, 0x30, 0x39, 0x31, 0x37, 0x31, 0x35, 0x33, 0x36, 0x34, 0x33, 0x36, 0x37, 0x38, 0x39, 0x32, 0x35, 0x39, 0x30, 0x33, 0x36, 0x30, 0x30, 0x31, 0x31, 0x33, 0x33, 0x30, 0x35, 0x33, 0x30, 0x35, 0x34, 0x38, 0x38, 0x32, 0x30, 0x34, 0x36, 0x36, 0x35, 0x32, 0x31, 0x33, 0x38, 0x34, 0x31, 0x34, 0x36, 0x39, 0x35, 0x31, 0x39, 0x34, 0x31, 0x35, 0x31, 0x31, 0x36, 0x30, 0x39, 0x34, 0x33, 0x33, 0x30, 0x35, 0x37, 0x32, 0x37, 0x30, 0x33, 0x36, 0x35, 0x37, 0x35, 0x39, 0x35, 0x39, 0x31, 0x39, 0x35, 0x33, 0x30, 0x39, 0x32, 0x31, 0x38, 0x36, 0x31, 0x31, 0x37, 0x404000e9, 0x33, 0x32, 0x40400009, 0x39, 0x33, 0x31, 0x30, 0x35, 0x31, 0x31, 0x38, 0x35, 0x34, 0x38, 0x30, 0x37, 0x4040010e, 0x33, 0x37, 0x39, 0x39, 0x36, 0x32, 0x37, 0x34, 0x39, 0x35, 0x36, 0x37, 0x33, 0x35, 0x31, 0x38, 0x38, 0x35, 0x37, 0x35, 0x32, 0x37, 0x32, 0x34, 0x38, 0x39, 0x31, 0x32, 0x32, 0x37, 0x39, 0x33, 0x38, 0x31, 0x38, 0x33, 0x30, 0x31, 0x31, 0x39, 0x34, 0x39, 0x31, 0x32, 0x39, 0x38, 0x33, 0x33, 0x36, 0x37, 0x33, 0x33, 0x36, 0x32, 0x34, 0x34, 0x30, 0x36, 0x35, 0x36, 0x36, 0x34, 0x33, 0x30, 0x38, 0x36, 0x30, 0x32, 0x31, 0x33, 0x39, 0x34, 0x39, 0x34, 0x36, 0x33, 0x39, 0x35, 0x32, 0x32, 0x34, 0x37, 0x33, 0x37, 0x31, 0x39, 0x30, 0x37, 0x30, 0x32, 0x31, 0x37, 0x39, 0x38, 0x40800099, 0x37, 0x30, 0x32, 0x37, 0x37, 0x30, 0x35, 0x33, 0x39, 0x32, 0x31, 0x37, 0x31, 0x37, 0x36, 0x32, 0x39, 0x33, 0x31, 0x37, 0x36, 0x37, 0x35, 0x40800232, 0x37, 0x34, 0x38, 0x31, 0x40400006, 0x36, 0x36, 0x39, 0x34, 0x30, 0x404001e7, 0x30, 0x30, 0x30, 0x35, 0x36, 0x38, 0x31, 0x32, 0x37, 0x31, 0x34, 0x35, 0x32, 0x36, 0x33, 0x35, 0x36, 0x30, 0x38, 0x32, 0x37, 0x37, 0x38, 0x35, 0x37, 0x37, 0x31, 0x33, 0x34, 0x32, 0x37, 0x35, 0x37, 0x37, 0x38, 0x39, 0x36, 0x40400129, 0x33, 0x36, 0x33, 0x37, 0x31, 0x37, 0x38, 0x37, 0x32, 0x31, 0x34, 0x36, 0x38, 0x34, 0x34, 0x30, 0x39, 0x30, 0x31, 0x32, 0x32, 0x34, 0x39, 0x35, 0x33, 0x34, 0x33, 0x30, 0x31, 0x34, 0x36, 0x35, 0x34, 0x39, 0x35, 0x38, 0x35, 0x33, 0x37, 0x31, 0x30, 0x35, 0x30, 0x37, 0x39, 0x404000ca, 0x36, 0x40400153, 0x38, 0x39, 0x32, 0x33, 0x35, 0x34, 0x404001c9, 0x39, 0x35, 0x36, 0x31, 0x31, 0x32, 0x31, 0x32, 0x39, 0x30, 0x32, 0x31, 0x39, 0x36, 0x30, 0x38, 0x36, 0x34, 0x30, 0x33, 0x34, 0x34, 0x31, 0x38, 0x31, 0x35, 0x39, 0x38, 0x31, 0x33, 0x36, 0x32, 0x39, 0x37, 0x37, 0x34, 0x40400074, 0x30, 0x39, 0x39, 0x36, 0x30, 0x35, 0x31, 0x38, 0x37, 0x30, 0x37, 0x32, 0x31, 0x31, 0x33, 0x34, 0x39, 0x40800000, 0x38, 0x33, 0x37, 0x32, 0x39, 0x37, 0x38, 0x30, 0x34, 0x39, 0x39, 0x404002da, 0x39, 0x37, 0x33, 0x31, 0x37, 0x33, 0x32, 0x38, 0x4040018a, 0x36, 0x33, 0x31, 0x38, 0x35, 0x40400301, 0x404002e8, 0x34, 0x35, 0x35, 0x33, 0x34, 0x36, 0x39, 0x30, 0x38, 0x33, 0x30, 0x32, 0x36, 0x34, 0x32, 0x35, 0x32, 0x32, 0x33, 0x30, 0x404002e3, 0x40400267, 0x38, 0x35, 0x30, 0x33, 0x35, 0x32, 0x36, 0x31, 0x39, 0x33, 0x31, 0x31, 0x40400212, 0x31, 0x30, 0x31, 0x30, 0x30, 0x30, 0x33, 0x31, 0x33, 0x37, 0x38, 0x33, 0x38, 0x37, 0x35, 0x32, 0x38, 0x38, 0x36, 0x35, 0x38, 0x37, 0x35, 0x33, 0x33, 0x32, 0x30, 0x38, 0x33, 0x38, 0x31, 0x34, 0x32, 0x30, 0x36, 0x40400140, 0x4040012b, 0x31, 0x34, 0x37, 0x33, 0x30, 0x33, 0x35, 0x39, 0x4080032e, 0x39, 0x30, 0x34, 0x32, 0x38, 0x37, 0x35, 0x35, 0x34, 0x36, 0x38, 0x37, 0x33, 0x31, 0x31, 0x35, 0x39, 0x35, 0x40400355, 0x33, 0x38, 0x38, 0x32, 0x33, 0x35, 0x33, 0x37, 0x38, 0x37, 0x35, 0x4080037f, 0x39, 0x4040013a, 0x31, 0x40400148, 0x38, 0x30, 0x35, 0x33, 0x4040018a, 0x32, 0x32, 0x36, 0x38, 0x30, 0x36, 0x36, 0x31, 0x33, 0x30, 0x30, 0x31, 0x39, 0x32, 0x37, 0x38, 0x37, 0x36, 0x36, 0x31, 0x31, 0x31, 0x39, 0x35, 0x39, 0x40400237, 0x36, 0x40800124, 0x38, 0x39, 0x33, 0x38, 0x30, 0x39, 0x35, 0x32, 0x35, 0x37, 0x32, 0x30, 0x31, 0x30, 0x36, 0x35, 0x34, 0x38, 0x35, 0x38, 0x36, 0x33, 0x32, 0x37, 0x4040009a, 0x39, 0x33, 0x36, 0x31, 0x35, 0x33, 0x40400220, 0x4080015c, 0x32, 0x33, 0x30, 0x33, 0x30, 0x31, 0x39, 0x35, 0x32, 0x30, 0x33, 0x35, 0x33, 0x30, 0x31, 0x38, 0x35, 0x32, 0x40400171, 0x40400075, 0x33, 0x36, 0x32, 0x32, 0x35, 0x39, 0x39, 0x34, 0x31, 0x33, 0x40400254, 0x34, 0x39, 0x37, 0x32, 0x31, 0x37, 0x404000de, 0x33, 0x34, 0x37, 0x39, 0x31, 0x33, 0x31, 0x35, 0x31, 0x35, 0x35, 0x37, 0x34, 0x38, 0x35, 0x37, 0x32, 0x34, 0x32, 0x34, 0x35, 0x34, 0x31, 0x35, 0x30, 0x36, 0x39, 0x4040013f, 0x38, 0x32, 0x39, 0x35, 0x33, 0x33, 0x31, 0x31, 0x36, 0x38, 0x36, 0x31, 0x37, 0x32, 0x37, 0x38, 0x40400337, 0x39, 0x30, 0x37, 0x35, 0x30, 0x39, 0x4040010d, 0x37, 0x35, 0x34, 0x36, 0x33, 0x37, 0x34, 0x36, 0x34, 0x39, 0x33, 0x39, 0x33, 0x31, 0x39, 0x32, 0x35, 0x35, 0x30, 0x36, 0x30, 0x34, 0x30, 0x30, 0x39, 0x4040026b, 0x31, 0x36, 0x37, 0x31, 0x31, 0x33, 0x39, 0x30, 0x30, 0x39, 0x38, 0x40400335, 0x34, 0x30, 0x31, 0x32, 0x38, 0x35, 0x38, 0x33, 0x36, 0x31, 0x36, 0x30, 0x33, 0x35, 0x36, 0x33, 0x37, 0x30, 0x37, 0x36, 0x36, 0x30, 0x31, 0x30, 0x34, 0x40400172, 0x38, 0x31, 0x39, 0x34, 0x32, 0x39, 0x4080041e, 0x404000ef, 0x4040028b, 0x37, 0x38, 0x33, 0x37, 0x34, 0x404004a8, 0x38, 0x32, 0x35, 0x35, 0x33, 0x37, 0x40800209, 0x32, 0x36, 0x38, 0x4040002e, 0x34, 0x30, 0x34, 0x37, 0x404001d1, 0x34, 0x404004b5, 0x4040038d, 0x38, 0x34, 0x404003a8, 0x36, 0x40c0031f, 0x33, 0x33, 0x31, 0x33, 0x36, 0x37, 0x37, 0x30, 0x32, 0x38, 0x39, 0x38, 0x39, 0x31, 0x35, 0x32, 0x40400062, 0x35, 0x32, 0x31, 0x36, 0x32, 0x30, 0x35, 0x36, 0x39, 0x36, 0x40400411, 0x30, 0x35, 0x38, 0x40400477, 0x35, 0x40400498, 0x35, 0x31, 0x31, 0x40400209, 0x38, 0x32, 0x34, 0x33, 0x30, 0x30, 0x33, 0x35, 0x35, 0x38, 0x37, 0x36, 0x34, 0x30, 0x32, 0x34, 0x37, 0x34, 0x39, 0x36, 0x34, 0x37, 0x33, 0x32, 0x36, 0x33, 0x4040043e, 0x39, 0x39, 0x32, 0x4040044b, 0x34, 0x32, 0x36, 0x39, 0x40c002c5, 0x37, 0x404001d6, 0x34, 0x4040053d, 0x4040041d, 0x39, 0x33, 0x34, 0x31, 0x37, 0x404001ad, 0x31, 0x32, 0x4040002a, 0x34, 0x4040019e, 0x31, 0x35, 0x30, 0x33, 0x30, 0x32, 0x38, 0x36, 0x31, 0x38, 0x32, 0x39, 0x37, 0x34, 0x35, 0x35, 0x35, 0x37, 0x30, 0x36, 0x37, 0x34, 0x40400135, 0x35, 0x30, 0x35, 0x34, 0x39, 0x34, 0x35, 0x38, 0x404001c5, 0x39, 0x40400051, 0x35, 0x36, 0x404001ec, 0x37, 0x32, 0x31, 0x30, 0x37, 0x39, 0x40400159, 0x33, 0x30, 0x4040010a, 0x33, 0x32, 0x31, 0x31, 0x36, 0x35, 0x33, 0x34, 0x34, 0x39, 0x38, 0x37, 0x32, 0x30, 0x32, 0x37, 0x4040011b, 0x30, 0x32, 0x33, 0x36, 0x34, 0x4040022e, 0x35, 0x34, 0x39, 0x39, 0x31, 0x31, 0x39, 0x38, 0x40400418, 0x34, 0x4040011b, 0x35, 0x33, 0x35, 0x36, 0x36, 0x33, 0x36, 0x39, 0x40400450, 0x32, 0x36, 0x35, 0x404002e4, 0x37, 0x38, 0x36, 0x32, 0x35, 0x35, 0x31, 0x404003da, 0x31, 0x37, 0x35, 0x37, 0x34, 0x36, 0x37, 0x32, 0x38, 0x39, 0x30, 0x39, 0x37, 0x37, 0x37, 0x37, 0x40800453, 0x30, 0x30, 0x30, 0x404005fd, 0x37, 0x30, 0x404004df, 0x36, 0x404003e9, 0x34, 0x39, 0x31, 0x4040041e, 0x40400297, 0x32, 0x31, 0x34, 0x37, 0x37, 0x32, 0x33, 0x35, 0x30, 0x31, 0x34, 0x31, 0x34, 0x40400643, 0x33, 0x35, 0x36, 0x404004af, 0x31, 0x36, 0x31, 0x33, 0x36, 0x31, 0x31, 0x35, 0x37, 0x33, 0x35, 0x32, 0x35, 0x40400504, 0x33, 0x34, 0x4040005b, 0x31, 0x38, 0x4040047b, 0x38, 0x34, 0x404005e7, 0x33, 0x33, 0x32, 0x33, 0x39, 0x30, 0x37, 0x33, 0x39, 0x34, 0x31, 0x34, 0x33, 0x33, 0x33, 0x34, 0x35, 0x34, 0x37, 0x37, 0x36, 0x32, 0x34, 0x40400242, 0x32, 0x35, 0x31, 0x38, 0x39, 0x38, 0x33, 0x35, 0x36, 0x39, 0x34, 0x38, 0x35, 0x35, 0x36, 0x32, 0x30, 0x39, 0x39, 0x32, 0x31, 0x39, 0x32, 0x32, 0x32, 0x31, 0x38, 0x34, 0x32, 0x37, 0x4040023e, 0x32, 0x404000ba, 0x36, 0x38, 0x38, 0x37, 0x36, 0x37, 0x31, 0x37, 0x39, 0x30, 0x40400055, 0x30, 0x40800106, 0x36, 0x36, 0x404003e7, 0x38, 0x38, 0x36, 0x32, 0x37, 0x32, 0x404006dc, 0x31, 0x37, 0x38, 0x36, 0x30, 0x38, 0x35, 0x37, 0x40400073, 0x33, 0x408002fc, 0x37, 0x39, 0x37, 0x36, 0x36, 0x38, 0x31, 0x404002bd, 0x30, 0x30, 0x39, 0x35, 0x33, 0x38, 0x38, 0x40400638, 0x33, 0x404006a5, 0x30, 0x36, 0x38, 0x30, 0x30, 0x36, 0x34, 0x32, 0x32, 0x35, 0x31, 0x32, 0x35, 0x32, 0x4040057b, 0x37, 0x33, 0x39, 0x32, 0x40400297, 0x40400474, 0x34, 0x408006b3, 0x38, 0x36, 0x32, 0x36, 0x39, 0x34, 0x35, 0x404001e5, 0x34, 0x31, 0x39, 0x36, 0x35, 0x32, 0x38, 0x35, 0x30, 0x40400099, 0x4040039c, 0x31, 0x38, 0x36, 0x33, 0x404001be, 0x34, 0x40800154, 0x32, 0x30, 0x33, 0x39, 0x4040058b, 0x34, 0x35, 0x404002bc, 0x32, 0x33, 0x37, 0x4040042c, 0x36, 0x40400510, 0x35, 0x36, 0x40400638, 0x37, 0x31, 0x39, 0x31, 0x37, 0x32, 0x38, 0x40400171, 0x37, 0x36, 0x34, 0x36, 0x35, 0x37, 0x35, 0x37, 0x33, 0x39, 0x40400101, 0x33, 0x38, 0x39, 0x40400748, 0x38, 0x33, 0x32, 0x36, 0x34, 0x35, 0x39, 0x39, 0x35, 0x38, 0x404006a7, 0x30, 0x34, 0x37, 0x38, 0x404001de, 0x40400328, 0x39, 0x4040002d, 0x36, 0x34, 0x30, 0x37, 0x38, 0x39, 0x35, 0x31, 0x4040008e, 0x36, 0x38, 0x33, 0x4040012f, 0x32, 0x35, 0x39, 0x35, 0x37, 0x30, 0x40400468, 0x38, 0x32, 0x32, 0x404002c8, 0x32, 0x4040061b, 0x34, 0x30, 0x37, 0x37, 0x32, 0x36, 0x37, 0x31, 0x39, 0x34, 0x37, 0x38, 0x40400319, 0x38, 0x32, 0x36, 0x30, 0x31, 0x34, 0x37, 0x36, 0x39, 0x39, 0x30, 0x39, 0x404004e8, 0x30, 0x31, 0x33, 0x36, 0x33, 0x39, 0x34, 0x34, 0x33, 0x4040027f, 0x33, 0x30, 0x40400105, 0x32, 0x30, 0x33, 0x34, 0x39, 0x36, 0x32, 0x35, 0x32, 0x34, 0x35, 0x31, 0x37, 0x404003b5, 0x39, 0x36, 0x35, 0x31, 0x34, 0x33, 0x31, 0x34, 0x32, 0x39, 0x38, 0x30, 0x39, 0x31, 0x39, 0x30, 0x36, 0x35, 0x39, 0x32, 0x40400282, 0x37, 0x32, 0x32, 0x31, 0x36, 0x39, 0x36, 0x34, 0x36, 0x40400419, 0x4040007a, 0x35, 0x4040050e, 0x34, 0x40800565, 0x38, 0x40400559, 0x39, 0x37, 0x4040057b, 0x35, 0x34, 0x4040049d, 0x4040023e, 0x37, 0x4040065a, 0x38, 0x34, 0x36, 0x38, 0x31, 0x33, 0x4040008c, 0x36, 0x38, 0x33, 0x38, 0x36, 0x38, 0x39, 0x34, 0x32, 0x37, 0x37, 0x34, 0x31, 0x35, 0x35, 0x39, 0x39, 0x31, 0x38, 0x35, 0x4040005a, 0x32, 0x34, 0x35, 0x39, 0x35, 0x33, 0x39, 0x35, 0x39, 0x34, 0x33, 0x31, 0x404005b7, 0x37, 0x40400012, 0x36, 0x38, 0x30, 0x38, 0x34, 0x35, 0x404002e7, 0x37, 0x33, 0x4040081e, 0x39, 0x35, 0x38, 0x34, 0x38, 0x36, 0x35, 0x33, 0x38, 0x404006e8, 0x36, 0x32, 0x404000f2, 0x36, 0x30, 0x39, 0x404004b6, 0x36, 0x30, 0x38, 0x30, 0x35, 0x31, 0x32, 0x34, 0x33, 0x38, 0x38, 0x34, 0x4040013a, 0x4040000b, 0x34, 0x31, 0x33, 0x4040030f, 0x37, 0x36, 0x32, 0x37, 0x38, 0x40400341, 0x37, 0x31, 0x35, 0x4040059b, 0x33, 0x35, 0x39, 0x39, 0x37, 0x37, 0x30, 0x30, 0x31, 0x32, 0x39, 0x40400472, 0x38, 0x39, 0x34, 0x34, 0x31, 0x40400277, 0x36, 0x38, 0x35, 0x35, 0x4040005f, 0x34, 0x30, 0x36, 0x33, 0x404008e6, 0x32, 0x30, 0x37, 0x32, 0x32, 0x40400158, 0x40800203, 0x34, 0x38, 0x31, 0x35, 0x38, 0x40400205, 0x404001fe, 0x4040027a, 0x40400298, 0x33, 0x39, 0x34, 0x35, 0x32, 0x32, 0x36, 0x37, 0x40c00496, 0x38, 0x4040058a, 0x32, 0x31, 0x404002ea, 0x32, 0x40400387, 0x35, 0x34, 0x36, 0x36, 0x36, 0x4040051b, 0x32, 0x33, 0x39, 0x38, 0x36, 0x34, 0x35, 0x36, 0x404004c4, 0x31, 0x36, 0x33, 0x35, 0x40800253, 0x40400811, 0x37, 0x404008ad, 0x39, 0x38, 0x4040045e, 0x39, 0x33, 0x36, 0x33, 0x34, 0x4040075b, 0x37, 0x34, 0x33, 0x32, 0x34, 0x4040047b, 0x31, 0x35, 0x30, 0x37, 0x36, 0x404004bb, 0x37, 0x39, 0x34, 0x35, 0x31, 0x30, 0x39, 0x4040003e, 0x30, 0x39, 0x34, 0x30, 0x404006a6, 0x38, 0x38, 0x37, 0x39, 0x37, 0x31, 0x30, 0x38, 0x39, 0x33, 0x404008f0, 0x36, 0x39, 0x31, 0x33, 0x36, 0x38, 0x36, 0x37, 0x32, 0x4040025b, 0x404001fe, 0x35, 0x4040053f, 0x40400468, 0x40400801, 0x31, 0x37, 0x39, 0x32, 0x38, 0x36, 0x38, 0x404008cc, 0x38, 0x37, 0x34, 0x37, 0x4080079e, 0x38, 0x32, 0x34, 0x4040097a, 0x38, 0x4040025b, 0x37, 0x31, 0x34, 0x39, 0x30, 0x39, 0x36, 0x37, 0x35, 0x39, 0x38, 0x404006ef, 0x33, 0x36, 0x35, 0x40400134, 0x38, 0x31, 0x4040005c, 0x40400745, 0x40400936, 0x36, 0x38, 0x32, 0x39, 0x4040057e, 0x38, 0x37, 0x32, 0x32, 0x36, 0x35, 0x38, 0x38, 0x30, 0x40400611, 0x35, 0x40400249, 0x34, 0x32, 0x37, 0x30, 0x34, 0x37, 0x37, 0x35, 0x35, 0x4040081e, 0x33, 0x37, 0x39, 0x36, 0x34, 0x31, 0x34, 0x35, 0x31, 0x35, 0x32, 0x404005fd, 0x32, 0x33, 0x34, 0x33, 0x36, 0x34, 0x35, 0x34, 0x404005de, 0x34, 0x34, 0x34, 0x37, 0x39, 0x35, 0x4040003c, 0x40400523, 0x408008e6, 0x34, 0x31, 0x4040052a, 0x33, 0x40400304, 0x35, 0x32, 0x33, 0x31, 0x40800841, 0x31, 0x36, 0x36, 0x31, 0x404008b2, 0x35, 0x39, 0x36, 0x39, 0x35, 0x33, 0x36, 0x32, 0x33, 0x31, 0x34, 0x404005ff, 0x32, 0x34, 0x38, 0x34, 0x39, 0x33, 0x37, 0x31, 0x38, 0x37, 0x31, 0x31, 0x30, 0x31, 0x34, 0x35, 0x37, 0x36, 0x35, 0x34, 0x40400761, 0x30, 0x32, 0x37, 0x39, 0x39, 0x33, 0x34, 0x34, 0x30, 0x33, 0x37, 0x34, 0x32, 0x30, 0x30, 0x37, 0x4040093f, 0x37, 0x38, 0x35, 0x33, 0x39, 0x30, 0x36, 0x32, 0x31, 0x39, 0x40800299, 0x40400345, 0x38, 0x34, 0x37, 0x408003d2, 0x38, 0x33, 0x33, 0x32, 0x31, 0x34, 0x34, 0x35, 0x37, 0x31, 0x40400284, 0x40400776, 0x34, 0x33, 0x35, 0x30, 0x40400928, 0x40400468, 0x35, 0x33, 0x31, 0x39, 0x31, 0x30, 0x34, 0x38, 0x34, 0x38, 0x31, 0x30, 0x30, 0x35, 0x33, 0x37, 0x30, 0x36, 0x404008bc, 0x4080059d, 0x40800781, 0x31, 0x40400559, 0x37, 0x4040031b, 0x35, 0x404007ec, 0x4040040c, 0x36, 0x33, 0x408007dc, 0x34, 0x40400971, 0x4080034e, 0x408003f5, 0x38, 0x4080052d, 0x40800887, 0x39, 0x40400187, 0x39, 0x31, 0x404008ce, 0x38, 0x31, 0x34, 0x36, 0x37, 0x35, 0x31, 0x4040062b, 0x31, 0x32, 0x33, 0x39, 0x40c001a9, 0x39, 0x30, 0x37, 0x31, 0x38, 0x36, 0x34, 0x39, 0x34, 0x32, 0x33, 0x31, 0x39, 0x36, 0x31, 0x35, 0x36, 0x404001ec, 0x404006bc, 0x39, 0x35, 0x40400926, 0x40400469, 0x4040011b, 0x36, 0x30, 0x33, 0x38, 0x40400a25, 0x4040016f, 0x40400384, 0x36, 0x32, 0x4040045a, 0x35, 0x4040084c, 0x36, 0x33, 0x38, 0x39, 0x33, 0x37, 0x37, 0x38, 0x37, 0x404008c5, 0x404000f8, 0x39, 0x37, 0x39, 0x32, 0x30, 0x37, 0x37, 0x33, 0x404005d7, 0x32, 0x31, 0x38, 0x32, 0x35, 0x36, 0x404007df, 0x36, 0x36, 0x404006d6, 0x34, 0x32, 0x4080067e, 0x36, 0x404006e6, 0x34, 0x34, 0x40400024, 0x35, 0x34, 0x39, 0x32, 0x30, 0x32, 0x36, 0x30, 0x35, 0x40400ab3, 0x408003e4, 0x32, 0x30, 0x31, 0x34, 0x39, 0x404004d2, 0x38, 0x35, 0x30, 0x37, 0x33, 0x40400599, 0x36, 0x36, 0x36, 0x30, 0x40400194, 0x32, 0x34, 0x33, 0x34, 0x30, 0x40400087, 0x30, 0x4040076b, 0x38, 0x36, 0x33, 0x40400956, 0x404007e4, 0x4040042b, 0x40400174, 0x35, 0x37, 0x39, 0x36, 0x32, 0x36, 0x38, 0x35, 0x36, 0x40400140, 0x35, 0x30, 0x38, 0x40400523, 0x35, 0x38, 0x37, 0x39, 0x36, 0x39, 0x39, 0x40400711, 0x35, 0x37, 0x34, 0x40400a18, 0x38, 0x34, 0x30, 0x404008b3, 0x31, 0x34, 0x35, 0x39, 0x31, 0x4040078c, 0x37, 0x30, 0x40400234, 0x30, 0x31, 0x40400be7, 0x31, 0x32, 0x40400c74, 0x30, 0x404003c3, 0x33, 0x39, 0x40400b2a, 0x40400112, 0x37, 0x31, 0x35, 0x404003b0, 0x34, 0x32, 0x30, 0x40800bf2, 0x39, 0x40400bc2, 0x30, 0x37, 0x40400341, 0x40400795, 0x40400aaf, 0x40400c62, 0x32, 0x31, 0x40400960, 0x32, 0x35, 0x31, 0x4040057b, 0x40400944, 0x39, 0x32, 0x404001b2, 0x38, 0x32, 0x36, 0x40400b66, 0x32, 0x40400278, 0x33, 0x32, 0x31, 0x35, 0x37, 0x39, 0x31, 0x39, 0x38, 0x34, 0x31, 0x34, 0x4080087b, 0x39, 0x31, 0x36, 0x34, 0x408006e8, 0x39, 0x40800b58, 0x404008db, 0x37, 0x32, 0x32, 0x40400321, 0x35, 0x404008a4, 0x40400141, 0x39, 0x31, 0x30, 0x404000bc, 0x40400c5b, 0x35, 0x32, 0x38, 0x30, 0x31, 0x37, 0x40400231, 0x37, 0x31, 0x32, 0x40400914, 0x38, 0x33, 0x32, 0x40400373, 0x31, 0x40400589, 0x30, 0x39, 0x33, 0x35, 0x33, 0x39, 0x36, 0x35, 0x37, 0x4040064b, 0x31, 0x30, 0x38, 0x33, 0x40400069, 0x35, 0x31, 0x4040077a, 0x40400d5a, 0x31, 0x34, 0x34, 0x34, 0x32, 0x31, 0x30, 0x30, 0x40400202, 0x30, 0x33, 0x4040019c, 0x31, 0x31, 0x30, 0x33, 0x40400c81, 0x40400009, 0x40400026, 0x40c00602, 0x35, 0x31, 0x36, 0x404005d9, 0x40800883, 0x4040092a, 0x35, 0x40800c42, 0x38, 0x35, 0x31, 0x37, 0x31, 0x34, 0x33, 0x37, 0x40400605, 0x4040006d, 0x31, 0x35, 0x35, 0x36, 0x35, 0x30, 0x38, 0x38, 0x404003b9, 0x39, 0x38, 0x39, 0x38, 0x35, 0x39, 0x39, 0x38, 0x32, 0x33, 0x38, 0x404001cf, 0x404009ba, 0x33, 0x4040016c, 0x4040043e, 0x404009c3, 0x38, 0x40800e05, 0x33, 0x32, 0x40400107, 0x35, 0x40400305, 0x33, 0x404001ca, 0x39, 0x4040041b, 0x39, 0x38, 0x4040087d, 0x34, 0x40400cb8, 0x37, 0x4040064b, 0x30, 0x37, 0x404000e5, 0x34, 0x38, 0x31, 0x34, 0x31, 0x40400539, 0x38, 0x35, 0x39, 0x34, 0x36, 0x31, 0x40400bc9, 0x38, 0x30}, - }, - { - input: "testdata/huffman-rand-1k.in", - want: "testdata/huffman-rand-1k.%s.expect", - wantNoInput: "testdata/huffman-rand-1k.%s.expect-noinput", - tokens: []token{0xf8, 0x8b, 0x96, 0x76, 0x48, 0xd, 0x85, 0x94, 0x25, 0x80, 0xaf, 0xc2, 0xfe, 0x8d, 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde, 0x6, 0xea, 0x7d, 0x18, 0x8b, 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73, 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4, 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15, 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde, 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9, 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14, 0x13, 0x6f, 0x23, 0xa9, 0xd, 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c, 0x99, 0xdf, 0xfd, 0x70, 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5, 0xd4, 0x80, 0x59, 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20, 0x1b, 0x46, 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4, 0x7b, 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28, 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f, 0xa0, 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4, 0x75, 0xda, 0xea, 0x9b, 0xa, 0x64, 0xb, 0xe0, 0x23, 0x29, 0xbd, 0xf7, 0xe7, 0x83, 0x3c, 0xfb, 0xdf, 0xb3, 0xae, 0x4f, 0xa4, 0x47, 0x55, 0x99, 0xde, 0x2f, 0x96, 0x6e, 0x1c, 0x43, 0x4c, 0x87, 0xe2, 0x7c, 0xd9, 0x5f, 0x4c, 0x7c, 0xe8, 0x90, 0x3, 0xdb, 0x30, 0x95, 0xd6, 0x22, 0xc, 0x47, 0xb8, 0x4d, 0x6b, 0xbd, 0x24, 0x11, 0xab, 0x2c, 0xd7, 0xbe, 0x6e, 0x7a, 0xd6, 0x8, 0xa3, 0x98, 0xd8, 0xdd, 0x15, 0x6a, 0xfa, 0x93, 0x30, 0x1, 0x25, 0x1d, 0xa2, 0x74, 0x86, 0x4b, 0x6a, 0x95, 0xe8, 0xe1, 0x4e, 0xe, 0x76, 0xb9, 0x49, 0xa9, 0x5f, 0xa0, 0xa6, 0x63, 0x3c, 0x7e, 0x7e, 0x20, 0x13, 0x4f, 0xbb, 0x66, 0x92, 0xb8, 0x2e, 0xa4, 0xfa, 0x48, 0xcb, 0xae, 0xb9, 0x3c, 0xaf, 0xd3, 0x1f, 0xe1, 0xd5, 0x8d, 0x42, 0x6d, 0xf0, 0xfc, 0x8c, 0xc, 0x0, 0xde, 0x40, 0xab, 0x8b, 0x47, 0x97, 0x4e, 0xa8, 0xcf, 0x8e, 0xdb, 0xa6, 0x8b, 0x20, 0x9, 0x84, 0x7a, 0x66, 0xe5, 0x98, 0x29, 0x2, 0x95, 0xe6, 0x38, 0x32, 0x60, 0x3, 0xe3, 0x9a, 0x1e, 0x54, 0xe8, 0x63, 0x80, 0x48, 0x9c, 0xe7, 0x63, 0x33, 0x6e, 0xa0, 0x65, 0x83, 0xfa, 0xc6, 0xba, 0x7a, 0x43, 0x71, 0x5, 0xf5, 0x68, 0x69, 0x85, 0x9c, 0xba, 0x45, 0xcd, 0x6b, 0xb, 0x19, 0xd1, 0xbb, 0x7f, 0x70, 0x85, 0x92, 0xd1, 0xb4, 0x64, 0x82, 0xb1, 0xe4, 0x62, 0xc5, 0x3c, 0x46, 0x1f, 0x92, 0x31, 0x1c, 0x4e, 0x41, 0x77, 0xf7, 0xe7, 0x87, 0xa2, 0xf, 0x6e, 0xe8, 0x92, 0x3, 0x6b, 0xa, 0xe7, 0xa9, 0x3b, 0x11, 0xda, 0x66, 0x8a, 0x29, 0xda, 0x79, 0xe1, 0x64, 0x8d, 0xe3, 0x54, 0xd4, 0xf5, 0xef, 0x64, 0x87, 0x3b, 0xf4, 0xc2, 0xf4, 0x71, 0x13, 0xa9, 0xe9, 0xe0, 0xa2, 0x6, 0x14, 0xab, 0x5d, 0xa7, 0x96, 0x0, 0xd6, 0xc3, 0xcc, 0x57, 0xed, 0x39, 0x6a, 0x25, 0xcd, 0x76, 0xea, 0xba, 0x3a, 0xf2, 0xa1, 0x95, 0x5d, 0xe5, 0x71, 0xcf, 0x9c, 0x62, 0x9e, 0x6a, 0xfa, 0xd5, 0x31, 0xd1, 0xa8, 0x66, 0x30, 0x33, 0xaa, 0x51, 0x17, 0x13, 0x82, 0x99, 0xc8, 0x14, 0x60, 0x9f, 0x4d, 0x32, 0x6d, 0xda, 0x19, 0x26, 0x21, 0xdc, 0x7e, 0x2e, 0x25, 0x67, 0x72, 0xca, 0xf, 0x92, 0xcd, 0xf6, 0xd6, 0xcb, 0x97, 0x8a, 0x33, 0x58, 0x73, 0x70, 0x91, 0x1d, 0xbf, 0x28, 0x23, 0xa3, 0xc, 0xf1, 0x83, 0xc3, 0xc8, 0x56, 0x77, 0x68, 0xe3, 0x82, 0xba, 0xb9, 0x57, 0x56, 0x57, 0x9c, 0xc3, 0xd6, 0x14, 0x5, 0x3c, 0xb1, 0xaf, 0x93, 0xc8, 0x8a, 0x57, 0x7f, 0x53, 0xfa, 0x2f, 0xaa, 0x6e, 0x66, 0x83, 0xfa, 0x33, 0xd1, 0x21, 0xab, 0x1b, 0x71, 0xb4, 0x7c, 0xda, 0xfd, 0xfb, 0x7f, 0x20, 0xab, 0x5e, 0xd5, 0xca, 0xfd, 0xdd, 0xe0, 0xee, 0xda, 0xba, 0xa8, 0x27, 0x99, 0x97, 0x69, 0xc1, 0x3c, 0x82, 0x8c, 0xa, 0x5c, 0x2d, 0x5b, 0x88, 0x3e, 0x34, 0x35, 0x86, 0x37, 0x46, 0x79, 0xe1, 0xaa, 0x19, 0xfb, 0xaa, 0xde, 0x15, 0x9, 0xd, 0x1a, 0x57, 0xff, 0xb5, 0xf, 0xf3, 0x2b, 0x5a, 0x6a, 0x4d, 0x19, 0x77, 0x71, 0x45, 0xdf, 0x4f, 0xb3, 0xec, 0xf1, 0xeb, 0x18, 0x53, 0x3e, 0x3b, 0x47, 0x8, 0x9a, 0x73, 0xa0, 0x5c, 0x8c, 0x5f, 0xeb, 0xf, 0x3a, 0xc2, 0x43, 0x67, 0xb4, 0x66, 0x67, 0x80, 0x58, 0xe, 0xc1, 0xec, 0x40, 0xd4, 0x22, 0x94, 0xca, 0xf9, 0xe8, 0x92, 0xe4, 0x69, 0x38, 0xbe, 0x67, 0x64, 0xca, 0x50, 0xc7, 0x6, 0x67, 0x42, 0x6e, 0xa3, 0xf0, 0xb7, 0x6c, 0xf2, 0xe8, 0x5f, 0xb1, 0xaf, 0xe7, 0xdb, 0xbb, 0x77, 0xb5, 0xf8, 0xcb, 0x8, 0xc4, 0x75, 0x7e, 0xc0, 0xf9, 0x1c, 0x7f, 0x3c, 0x89, 0x2f, 0xd2, 0x58, 0x3a, 0xe2, 0xf8, 0x91, 0xb6, 0x7b, 0x24, 0x27, 0xe9, 0xae, 0x84, 0x8b, 0xde, 0x74, 0xac, 0xfd, 0xd9, 0xb7, 0x69, 0x2a, 0xec, 0x32, 0x6f, 0xf0, 0x92, 0x84, 0xf1, 0x40, 0xc, 0x8a, 0xbc, 0x39, 0x6e, 0x2e, 0x73, 0xd4, 0x6e, 0x8a, 0x74, 0x2a, 0xdc, 0x60, 0x1f, 0xa3, 0x7, 0xde, 0x75, 0x8b, 0x74, 0xc8, 0xfe, 0x63, 0x75, 0xf6, 0x3d, 0x63, 0xac, 0x33, 0x89, 0xc3, 0xf0, 0xf8, 0x2d, 0x6b, 0xb4, 0x9e, 0x74, 0x8b, 0x5c, 0x33, 0xb4, 0xca, 0xa8, 0xe4, 0x99, 0xb6, 0x90, 0xa1, 0xef, 0xf, 0xd3, 0x61, 0xb2, 0xc6, 0x1a, 0x94, 0x7c, 0x44, 0x55, 0xf4, 0x45, 0xff, 0x9e, 0xa5, 0x5a, 0xc6, 0xa0, 0xe8, 0x2a, 0xc1, 0x8d, 0x6f, 0x34, 0x11, 0xb9, 0xbe, 0x4e, 0xd9, 0x87, 0x97, 0x73, 0xcf, 0x3d, 0x23, 0xae, 0xd5, 0x1a, 0x5e, 0xae, 0x5d, 0x6a, 0x3, 0xf9, 0x22, 0xd, 0x10, 0xd9, 0x47, 0x69, 0x15, 0x3f, 0xee, 0x52, 0xa3, 0x8, 0xd2, 0x3c, 0x51, 0xf4, 0xf8, 0x9d, 0xe4, 0x98, 0x89, 0xc8, 0x67, 0x39, 0xd5, 0x5e, 0x35, 0x78, 0x27, 0xe8, 0x3c, 0x80, 0xae, 0x79, 0x71, 0xd2, 0x93, 0xf4, 0xaa, 0x51, 0x12, 0x1c, 0x4b, 0x1b, 0xe5, 0x6e, 0x15, 0x6f, 0xe4, 0xbb, 0x51, 0x9b, 0x45, 0x9f, 0xf9, 0xc4, 0x8c, 0x2a, 0xfb, 0x1a, 0xdf, 0x55, 0xd3, 0x48, 0x93, 0x27, 0x1, 0x26, 0xc2, 0x6b, 0x55, 0x6d, 0xa2, 0xfb, 0x84, 0x8b, 0xc9, 0x9e, 0x28, 0xc2, 0xef, 0x1a, 0x24, 0xec, 0x9b, 0xae, 0xbd, 0x60, 0xe9, 0x15, 0x35, 0xee, 0x42, 0xa4, 0x33, 0x5b, 0xfa, 0xf, 0xb6, 0xf7, 0x1, 0xa6, 0x2, 0x4c, 0xca, 0x90, 0x58, 0x3a, 0x96, 0x41, 0xe7, 0xcb, 0x9, 0x8c, 0xdb, 0x85, 0x4d, 0xa8, 0x89, 0xf3, 0xb5, 0x8e, 0xfd, 0x75, 0x5b, 0x4f, 0xed, 0xde, 0x3f, 0xeb, 0x38, 0xa3, 0xbe, 0xb0, 0x73, 0xfc, 0xb8, 0x54, 0xf7, 0x4c, 0x30, 0x67, 0x2e, 0x38, 0xa2, 0x54, 0x18, 0xba, 0x8, 0xbf, 0xf2, 0x39, 0xd5, 0xfe, 0xa5, 0x41, 0xc6, 0x66, 0x66, 0xba, 0x81, 0xef, 0x67, 0xe4, 0xe6, 0x3c, 0xc, 0xca, 0xa4, 0xa, 0x79, 0xb3, 0x57, 0x8b, 0x8a, 0x75, 0x98, 0x18, 0x42, 0x2f, 0x29, 0xa3, 0x82, 0xef, 0x9f, 0x86, 0x6, 0x23, 0xe1, 0x75, 0xfa, 0x8, 0xb1, 0xde, 0x17, 0x4a}, - }, - { - input: "testdata/huffman-rand-limit.in", - want: "testdata/huffman-rand-limit.%s.expect", - wantNoInput: "testdata/huffman-rand-limit.%s.expect-noinput", - tokens: []token{0x61, 0x51c00000, 0xa, 0xf8, 0x8b, 0x96, 0x76, 0x48, 0xa, 0x85, 0x94, 0x25, 0x80, 0xaf, 0xc2, 0xfe, 0x8d, 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde, 0x6, 0xea, 0x7d, 0x18, 0x8b, 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73, 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4, 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15, 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde, 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9, 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14, 0x13, 0x6f, 0x23, 0xa9, 0xa, 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c, 0x99, 0xdf, 0xfd, 0x70, 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5, 0xd4, 0x80, 0x59, 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20, 0x1b, 0x46, 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4, 0x7b, 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28, 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f, 0xa0, 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4, 0x75, 0xda, 0xea, 0x9b, 0xa}, - }, - { - input: "testdata/huffman-shifts.in", - want: "testdata/huffman-shifts.%s.expect", - wantNoInput: "testdata/huffman-shifts.%s.expect-noinput", - tokens: []token{0x31, 0x30, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x52400001, 0xd, 0xa, 0x32, 0x33, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7f400001}, - }, - { - input: "testdata/huffman-text-shift.in", - want: "testdata/huffman-text-shift.%s.expect", - wantNoInput: "testdata/huffman-text-shift.%s.expect-noinput", - tokens: []token{0x2f, 0x2f, 0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x32, 0x30, 0x30, 0x39, 0x54, 0x68, 0x47, 0x6f, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x6c, 0x40800016, 0x72, 0x72, 0x76, 0x64, 0x2e, 0xd, 0xa, 0x2f, 0x2f, 0x55, 0x6f, 0x66, 0x74, 0x68, 0x69, 0x6f, 0x75, 0x72, 0x63, 0x63, 0x6f, 0x64, 0x69, 0x67, 0x6f, 0x76, 0x72, 0x6e, 0x64, 0x62, 0x79, 0x42, 0x53, 0x44, 0x2d, 0x74, 0x79, 0x6c, 0x40400020, 0x6c, 0x69, 0x63, 0x6e, 0x74, 0x68, 0x74, 0x63, 0x6e, 0x62, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x69, 0x6e, 0x74, 0x68, 0x4c, 0x49, 0x43, 0x45, 0x4e, 0x53, 0x45, 0x66, 0x69, 0x6c, 0x2e, 0xd, 0xa, 0xd, 0xa, 0x70, 0x63, 0x6b, 0x67, 0x6d, 0x69, 0x6e, 0x4040000a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x22, 0x6f, 0x22, 0x4040000c, 0x66, 0x75, 0x6e, 0x63, 0x6d, 0x69, 0x6e, 0x28, 0x29, 0x7b, 0xd, 0xa, 0x9, 0x76, 0x72, 0x62, 0x3d, 0x6d, 0x6b, 0x28, 0x5b, 0x5d, 0x62, 0x79, 0x74, 0x2c, 0x36, 0x35, 0x35, 0x33, 0x35, 0x29, 0xd, 0xa, 0x9, 0x66, 0x2c, 0x5f, 0x3a, 0x3d, 0x6f, 0x2e, 0x43, 0x72, 0x74, 0x28, 0x22, 0x68, 0x75, 0x66, 0x66, 0x6d, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c, 0x2d, 0x6d, 0x78, 0x2e, 0x69, 0x6e, 0x22, 0x40800021, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x28, 0x62, 0x29, 0xd, 0xa, 0x7d, 0xd, 0xa, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x58, 0x78, 0x79, 0x7a, 0x21, 0x22, 0x23, 0xc2, 0xa4, 0x25, 0x26, 0x2f, 0x3f, 0x22}, - }, - { - input: "testdata/huffman-text.in", - want: "testdata/huffman-text.%s.expect", - wantNoInput: "testdata/huffman-text.%s.expect-noinput", - tokens: []token{0x2f, 0x2f, 0x20, 0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0x32, 0x30, 0x30, 0x39, 0x20, 0x54, 0x68, 0x65, 0x20, 0x47, 0x6f, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x73, 0x2e, 0x20, 0x41, 0x6c, 0x6c, 0x20, 0x4080001e, 0x73, 0x20, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x2e, 0xd, 0xa, 0x2f, 0x2f, 0x20, 0x55, 0x73, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x20, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x69, 0x73, 0x20, 0x67, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x61, 0x20, 0x42, 0x53, 0x44, 0x2d, 0x73, 0x74, 0x79, 0x6c, 0x65, 0x40800036, 0x6c, 0x69, 0x63, 0x65, 0x6e, 0x73, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x62, 0x65, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x4c, 0x49, 0x43, 0x45, 0x4e, 0x53, 0x45, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x2e, 0xd, 0xa, 0xd, 0xa, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x20, 0x6d, 0x61, 0x69, 0x6e, 0x4040000f, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x22, 0x6f, 0x73, 0x22, 0x4040000e, 0x66, 0x75, 0x6e, 0x63, 0x4080001b, 0x28, 0x29, 0x20, 0x7b, 0xd, 0xa, 0x9, 0x76, 0x61, 0x72, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x6b, 0x65, 0x28, 0x5b, 0x5d, 0x62, 0x79, 0x74, 0x65, 0x2c, 0x20, 0x36, 0x35, 0x35, 0x33, 0x35, 0x29, 0xd, 0xa, 0x9, 0x66, 0x2c, 0x20, 0x5f, 0x20, 0x3a, 0x3d, 0x20, 0x6f, 0x73, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x28, 0x22, 0x68, 0x75, 0x66, 0x66, 0x6d, 0x61, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c, 0x2d, 0x6d, 0x61, 0x78, 0x2e, 0x69, 0x6e, 0x22, 0x4080002a, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x65, 0x28, 0x62, 0x29, 0xd, 0xa, 0x7d, 0xd, 0xa}, - }, - { - input: "testdata/huffman-zero.in", - want: "testdata/huffman-zero.%s.expect", - wantNoInput: "testdata/huffman-zero.%s.expect-noinput", - tokens: []token{0x30, ml, 0x4b800000}, - }, - { - input: "", - want: "", - wantNoInput: "testdata/null-long-match.%s.expect-noinput", - tokens: []token{0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, 0x41400000}, - }, -} - -// TestWriteBlock tests if the writeBlock encoding has changed. -// To update the reference files use the "-update" flag on the test. -func TestWriteBlock(t *testing.T) { - for _, test := range writeBlockTests { - testBlock(t, test, "wb") - } -} - -// TestWriteBlockDynamic tests if the writeBlockDynamic encoding has changed. -// To update the reference files use the "-update" flag on the test. -func TestWriteBlockDynamic(t *testing.T) { - for _, test := range writeBlockTests { - testBlock(t, test, "dyn") - } -} - -// TestWriteBlockDynamic tests if the writeBlockDynamic encoding has changed. -// To update the reference files use the "-update" flag on the test. -func TestWriteBlockDynamicSync(t *testing.T) { - for _, test := range writeBlockTests { - testBlock(t, test, "sync") - } -} - -// testBlock tests a block against its references, -// or regenerate the references, if "-update" flag is set. -func testBlock(t *testing.T, test huffTest, ttype string) { - if test.want != "" { - test.want = fmt.Sprintf(test.want, ttype) - } - const gotSuffix = ".got" - test.wantNoInput = fmt.Sprintf(test.wantNoInput, ttype) - tokens := indexTokens(test.tokens) - if *update { - if test.input != "" { - t.Logf("Updating %q", test.want) - input, err := os.ReadFile(test.input) - if err != nil { - t.Error(err) - return - } - - f, err := os.Create(test.want) - if err != nil { - t.Error(err) - return - } - defer f.Close() - bw := newHuffmanBitWriter(f) - writeToType(t, ttype, bw, tokens, input) - } - - t.Logf("Updating %q", test.wantNoInput) - f, err := os.Create(test.wantNoInput) - if err != nil { - t.Error(err) - return - } - defer f.Close() - bw := newHuffmanBitWriter(f) - writeToType(t, ttype, bw, tokens, nil) - return - } - - if test.input != "" { - t.Logf("Testing %q", test.want) - input, err := os.ReadFile(test.input) - if err != nil { - t.Error(err) - return - } - want, err := os.ReadFile(test.want) - if err != nil { - t.Error(err) - return - } - var buf bytes.Buffer - bw := newHuffmanBitWriter(&buf) - writeToType(t, ttype, bw, tokens, input) - - got := buf.Bytes() - if !bytes.Equal(got, want) { - t.Errorf("writeBlock did not yield expected result for file %q with input. See %q", test.want, test.want+gotSuffix) - if err := os.WriteFile(test.want+gotSuffix, got, 0o666); err != nil { - t.Error(err) - } - } - t.Log("Output ok") - - // Test if the writer produces the same output after reset. - buf.Reset() - bw.reset(&buf) - writeToType(t, ttype, bw, tokens, input) - bw.flush() - got = buf.Bytes() - if !bytes.Equal(got, want) { - t.Errorf("reset: writeBlock did not yield expected result for file %q with input. See %q", test.want, test.want+".reset"+gotSuffix) - if err := os.WriteFile(test.want+".reset"+gotSuffix, got, 0o666); err != nil { - t.Error(err) - } - return - } - t.Log("Reset ok") - testWriterEOF(t, "wb", test, true) - } - t.Logf("Testing %q", test.wantNoInput) - wantNI, err := os.ReadFile(test.wantNoInput) - if err != nil { - t.Error(err) - return - } - var buf bytes.Buffer - bw := newHuffmanBitWriter(&buf) - writeToType(t, ttype, bw, tokens, nil) - - got := buf.Bytes() - if !bytes.Equal(got, wantNI) { - t.Errorf("writeBlock did not yield expected result for file %q with input. See %q", test.wantNoInput, test.wantNoInput+gotSuffix) - if err := os.WriteFile(test.wantNoInput+gotSuffix, got, 0o666); err != nil { - t.Error(err) - } - } else if got[0]&1 == 1 { - t.Error("got unexpected EOF") - return - } - - t.Log("Output ok") - - // Test if the writer produces the same output after reset. - buf.Reset() - bw.reset(&buf) - writeToType(t, ttype, bw, tokens, nil) - bw.flush() - got = buf.Bytes() - if !bytes.Equal(got, wantNI) { - t.Errorf("reset: writeBlock did not yield expected result for file %q without input. See %q", test.wantNoInput, test.wantNoInput+".reset"+gotSuffix) - if err := os.WriteFile(test.wantNoInput+".reset"+gotSuffix, got, 0o666); err != nil { - t.Error(err) - } - return - } - t.Log("Reset ok") - testWriterEOF(t, "wb", test, false) -} - -func writeToType(t *testing.T, ttype string, bw *huffmanBitWriter, tok tokens, input []byte) { - switch ttype { - case "wb": - bw.writeBlock(&tok, false, input) - case "dyn": - bw.writeBlockDynamic(&tok, false, input, false) - case "sync": - bw.writeBlockDynamic(&tok, false, input, true) - default: - panic("unknown test type") - } - - if bw.err != nil { - t.Error(bw.err) - return - } - - bw.flush() - if bw.err != nil { - t.Error(bw.err) - return - } -} - -// testWriterEOF tests if the written block contains an EOF marker. -func testWriterEOF(t *testing.T, ttype string, test huffTest, useInput bool) { - if useInput && test.input == "" { - return - } - var input []byte - if useInput { - var err error - input, err = os.ReadFile(test.input) - if err != nil { - t.Error(err) - return - } - } - var buf bytes.Buffer - bw := newHuffmanBitWriter(&buf) - tokens := indexTokens(test.tokens) - switch ttype { - case "wb": - bw.writeBlock(&tokens, true, input) - case "dyn": - bw.writeBlockDynamic(&tokens, true, input, true) - case "huff": - bw.writeBlockHuff(true, input, true) - default: - panic("unknown test type") - } - if bw.err != nil { - t.Error(bw.err) - return - } - - bw.flush() - if bw.err != nil { - t.Error(bw.err) - return - } - b := buf.Bytes() - if len(b) == 0 { - t.Error("no output received") - return - } - if b[0]&1 != 1 { - t.Errorf("block not marked with EOF for input %q", test.input) - return - } - t.Log("EOF ok") -} diff --git a/internal/compress/flate/huffman_code.go b/internal/compress/flate/huffman_code.go deleted file mode 100644 index 42da87e8..00000000 --- a/internal/compress/flate/huffman_code.go +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "math" - "math/bits" -) - -const ( - maxBitsLimit = 16 - // number of valid literals - literalCount = 286 -) - -// hcode is a huffman code with a bit code and bit length. -type hcode uint32 - -func (h hcode) len() uint8 { - return uint8(h) -} - -func (h hcode) code64() uint64 { - return uint64(h >> 8) -} - -func (h hcode) zero() bool { - return h == 0 -} - -type huffmanEncoder struct { - codes []hcode - bitCount [17]int32 - - // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. - // The largest of these is literalCount, so we allocate for that case. - freqcache [literalCount + 1]literalNode -} - -type literalNode struct { - literal uint16 - freq uint16 -} - -// A levelInfo describes the state of the constructed tree for a given depth. -type levelInfo struct { - // Our level. for better printing - level int32 - - // The frequency of the last node at this level - lastFreq int32 - - // The frequency of the next character to add to this level - nextCharFreq int32 - - // The frequency of the next pair (from level below) to add to this level. - // Only valid if the "needed" value of the next lower level is 0. - nextPairFreq int32 - - // The number of chains remaining to generate for this level before moving - // up to the next level - needed int32 -} - -// set sets the code and length of an hcode. -func (h *hcode) set(code uint16, length uint8) { - *h = hcode(length) | (hcode(code) << 8) -} - -func newhcode(code uint16, length uint8) hcode { - return hcode(length) | (hcode(code) << 8) -} - -func reverseBits(number uint16, bitLength byte) uint16 { - return bits.Reverse16(number << ((16 - bitLength) & 15)) -} - -func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } - -func newHuffmanEncoder(size int) *huffmanEncoder { - // Make capacity to next power of two. - c := uint(bits.Len32(uint32(size - 1))) - return &huffmanEncoder{codes: make([]hcode, size, 1<<c)} -} - -// Generates a HuffmanCode corresponding to the fixed literal table -func generateFixedLiteralEncoding() *huffmanEncoder { - h := newHuffmanEncoder(literalCount) - codes := h.codes - var ch uint16 - for ch = range uint16(literalCount) { - var bits uint16 - var size uint8 - switch { - case ch < 144: - // size 8, 000110000 .. 10111111 - bits = ch + 48 - size = 8 - case ch < 256: - // size 9, 110010000 .. 111111111 - bits = ch + 400 - 144 - size = 9 - case ch < 280: - // size 7, 0000000 .. 0010111 - bits = ch - 256 - size = 7 - default: - // size 8, 11000000 .. 11000111 - bits = ch + 192 - 280 - size = 8 - } - codes[ch] = newhcode(reverseBits(bits, size), size) - } - return h -} - -func generateFixedOffsetEncoding() *huffmanEncoder { - h := newHuffmanEncoder(30) - codes := h.codes - for ch := range codes { - codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5) - } - return h -} - -var ( - fixedLiteralEncoding = generateFixedLiteralEncoding() - fixedOffsetEncoding = generateFixedOffsetEncoding() -) - -func (h *huffmanEncoder) bitLength(freq []uint16) int { - var total int - for i, f := range freq { - if f != 0 { - total += int(f) * int(h.codes[i].len()) - } - } - return total -} - -func (h *huffmanEncoder) bitLengthRaw(b []byte) int { - var total int - for _, f := range b { - total += int(h.codes[f].len()) - } - return total -} - -// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused. -func (h *huffmanEncoder) canReuseBits(freq []uint16) int { - var total int - for i, f := range freq { - if f != 0 { - code := h.codes[i] - if code.zero() { - return math.MaxInt32 - } - total += int(f) * int(code.len()) - } - } - return total -} - -// Return the number of literals assigned to each bit size in the Huffman encoding -// -// This method is only called when list.length >= 3 -// The cases of 0, 1, and 2 literals are handled by special case code. -// -// list An array of the literals with non-zero frequencies -// -// and their associated frequencies. The array is in order of increasing -// frequency, and has as its last element a special element with frequency -// MaxInt32 -// -// maxBits The maximum number of bits that should be used to encode any literal. -// -// Must be less than 16. -// -// return An integer array in which array[i] indicates the number of literals -// -// that should be encoded in i bits. -func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { - if maxBits >= maxBitsLimit { - panic("flate: maxBits too large") - } - n := int32(len(list)) - list = list[0 : n+1] - list[n] = maxNode() - - // The tree can't have greater depth than n - 1, no matter what. This - // saves a little bit of work in some small cases - if maxBits > n-1 { - maxBits = n - 1 - } - - // Create information about each of the levels. - // A bogus "Level 0" whose sole purpose is so that - // level1.prev.needed==0. This makes level1.nextPairFreq - // be a legitimate value that never gets chosen. - var levels [maxBitsLimit]levelInfo - // leafCounts[i] counts the number of literals at the left - // of ancestors of the rightmost node at level i. - // leafCounts[i][j] is the number of literals at the left - // of the level j ancestor. - var leafCounts [maxBitsLimit][maxBitsLimit]int32 - - // Descending to only have 1 bounds check. - l2f := int32(list[2].freq) - l1f := int32(list[1].freq) - l0f := int32(list[0].freq) + int32(list[1].freq) - - for level := int32(1); level <= maxBits; level++ { - // For every level, the first two items are the first two characters. - // We initialize the levels as if we had already figured this out. - levels[level] = levelInfo{ - level: level, - lastFreq: l1f, - nextCharFreq: l2f, - nextPairFreq: l0f, - } - leafCounts[level][level] = 2 - if level == 1 { - levels[level].nextPairFreq = math.MaxInt32 - } - } - - // We need a total of 2*n - 2 items at top level and have already generated 2. - levels[maxBits].needed = 2*n - 4 - - level := uint32(maxBits) - for level < 16 { - l := &levels[level] - if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { - // We've run out of both leafs and pairs. - // End all calculations for this level. - // To make sure we never come back to this level or any lower level, - // set nextPairFreq impossibly large. - l.needed = 0 - levels[level+1].nextPairFreq = math.MaxInt32 - level++ - continue - } - - prevFreq := l.lastFreq - if l.nextCharFreq < l.nextPairFreq { - // The next item on this row is a leaf node. - n := leafCounts[level][level] + 1 - l.lastFreq = l.nextCharFreq - // Lower leafCounts are the same of the previous node. - leafCounts[level][level] = n - e := list[n] - if e.literal < math.MaxUint16 { - l.nextCharFreq = int32(e.freq) - } else { - l.nextCharFreq = math.MaxInt32 - } - } else { - // The next item on this row is a pair from the previous row. - // nextPairFreq isn't valid until we generate two - // more values in the level below - l.lastFreq = l.nextPairFreq - // Take leaf counts from the lower level, except counts[level] remains the same. - if true { - save := leafCounts[level][level] - leafCounts[level] = leafCounts[level-1] - leafCounts[level][level] = save - } else { - copy(leafCounts[level][:level], leafCounts[level-1][:level]) - } - levels[l.level-1].needed = 2 - } - - if l.needed--; l.needed == 0 { - // We've done everything we need to do for this level. - // Continue calculating one level up. Fill in nextPairFreq - // of that level with the sum of the two nodes we've just calculated on - // this level. - if l.level == maxBits { - // All done! - break - } - levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq - level++ - } else { - // If we stole from below, move down temporarily to replenish it. - for levels[level-1].needed > 0 { - level-- - } - } - } - - // Somethings is wrong if at the end, the top level is null or hasn't used - // all of the leaves. - if leafCounts[maxBits][maxBits] != n { - panic("leafCounts[maxBits][maxBits] != n") - } - - bitCount := h.bitCount[:maxBits+1] - bits := 1 - counts := &leafCounts[maxBits] - for level := maxBits; level > 0; level-- { - // chain.leafCount gives the number of literals requiring at least "bits" - // bits to encode. - bitCount[bits] = counts[level] - counts[level-1] - bits++ - } - return bitCount -} - -// Look at the leaves and assign them a bit count and an encoding as specified -// in RFC 1951 3.2.2 -func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { - code := uint16(0) - for n, bits := range bitCount { - code <<= 1 - if n == 0 || bits == 0 { - continue - } - // The literals list[len(list)-bits] .. list[len(list)-bits] - // are encoded using "bits" bits, and get the values - // code, code + 1, .... The code values are - // assigned in literal order (not frequency order). - chunk := list[len(list)-int(bits):] - - sortByLiteral(chunk) - for _, node := range chunk { - h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) - code++ - } - list = list[0 : len(list)-int(bits)] - } -} - -// Update this Huffman Code object to be the minimum code for the specified frequency count. -// -// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. -// maxBits The maximum number of bits to use for any literal. -func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { - list := h.freqcache[:len(freq)+1] - codes := h.codes[:len(freq)] - // Number of non-zero literals - count := 0 - // Set list to be the set of all non-zero literals and their frequencies - for i, f := range freq { - if f != 0 { - list[count] = literalNode{uint16(i), f} - count++ - } else { - codes[i] = 0 - } - } - list[count] = literalNode{} - - list = list[:count] - if count <= 2 { - // Handle the small cases here, because they are awkward for the general case code. With - // two or fewer literals, everything has bit length 1. - for i, node := range list { - // "list" is in order of increasing literal value. - h.codes[node.literal].set(uint16(i), 1) - } - return - } - sortByFreq(list) - - // Get the number of literals for each bit count - bitCount := h.bitCounts(list, maxBits) - // And do the assignment - h.assignEncodingAndSize(bitCount, list) -} - -// atLeastOne clamps the result between 1 and 15. -func atLeastOne(v float32) float32 { - if v < 1 { - return 1 - } - if v > 15 { - return 15 - } - return v -} - -func histogram(b []byte, h []uint16) { - if true && len(b) >= 8<<10 { - // Split for bigger inputs - histogramSplit(b, h) - } else { - h = h[:256] - for _, t := range b { - h[t]++ - } - } -} - -func histogramSplit(b []byte, h []uint16) { - // Tested, and slightly faster than 2-way. - // Writing to separate arrays and combining is also slightly slower. - h = h[:256] - for len(b)&3 != 0 { - h[b[0]]++ - b = b[1:] - } - n := len(b) / 4 - x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] - y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] - for i, t := range x { - v0 := &h[t] - v1 := &h[y[i]] - v3 := &h[w[i]] - v2 := &h[z[i]] - *v0++ - *v1++ - *v2++ - *v3++ - } -} diff --git a/internal/compress/flate/huffman_sortByFreq.go b/internal/compress/flate/huffman_sortByFreq.go deleted file mode 100644 index 6c05ba8c..00000000 --- a/internal/compress/flate/huffman_sortByFreq.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByFreq(data []literalNode) { - n := len(data) - quickSortByFreq(data, 0, n, maxDepth(n)) -} - -func quickSortByFreq(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivotByFreq(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSortByFreq(data, a, mlo, maxDepth) - a = mhi // i.e., quickSortByFreq(data, mhi, b) - } else { - quickSortByFreq(data, mhi, b, maxDepth) - b = mlo // i.e., quickSortByFreq(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSortByFreq(data, a, b) - } -} - -func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) - medianOfThreeSortByFreq(data, m, m-s, m+s) - medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThreeSortByFreq(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { - } - b := a - for { - for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot - } - for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot - } - for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSortByFreq(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// quickSortByFreq, loosely following Bentley and McIlroy, -// ``Engineering a Sort Function,'' SP&E November 1993. - -// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} diff --git a/internal/compress/flate/huffman_sortByLiteral.go b/internal/compress/flate/huffman_sortByLiteral.go deleted file mode 100644 index f6d0a404..00000000 --- a/internal/compress/flate/huffman_sortByLiteral.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByLiteral(data []literalNode) { - n := len(data) - quickSort(data, 0, n, maxDepth(n)) -} - -func quickSort(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivot(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSort(data, a, mlo, maxDepth) - a = mhi // i.e., quickSort(data, mhi, b) - } else { - quickSort(data, mhi, b, maxDepth) - b = mlo // i.e., quickSort(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].literal < data[i-6].literal { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSort(data, a, b) - } -} - -func heapSort(data []literalNode, a, b int) { - first := a - lo := 0 - hi := b - a - - // Build heap with greatest element at top. - for i := (hi - 1) / 2; i >= 0; i-- { - siftDown(data, i, hi, first) - } - - // Pop elements, largest first, into end of data. - for i := hi - 1; i >= 0; i-- { - data[first], data[first+i] = data[first+i], data[first] - siftDown(data, lo, i, first) - } -} - -// siftDown implements the heap property on data[lo, hi). -// first is an offset into the array where the root of the heap lies. -func siftDown(data []literalNode, lo, hi, first int) { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && data[first+child].literal < data[first+child+1].literal { - child++ - } - if data[first+root].literal > data[first+child].literal { - return - } - data[first+root], data[first+child] = data[first+child], data[first+root] - root = child - } -} - -func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThree(data, lo, lo+s, lo+2*s) - medianOfThree(data, m, m-s, m+s) - medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThree(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && data[a].literal < data[pivot].literal; a++ { - } - b := a - for { - for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot - } - for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].literal > data[pivot].literal { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot - } - for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSort(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && data[j].literal < data[j-1].literal; j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// maxDepth returns a threshold at which quicksort should switch -// to heapsort. It returns 2*ceil(lg(n+1)). -func maxDepth(n int) int { - var depth int - for i := n; i > 0; i >>= 1 { - depth++ - } - return depth * 2 -} - -// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThree(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].literal < data[m1].literal { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} diff --git a/internal/compress/flate/inflate.go b/internal/compress/flate/inflate.go deleted file mode 100644 index f12f1e77..00000000 --- a/internal/compress/flate/inflate.go +++ /dev/null @@ -1,867 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package flate implements the DEFLATE compressed data format, described in -// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file -// formats. -package flate - -import ( - "bufio" - "compress/flate" - "fmt" - "io" - "math/bits" - "sync" -) - -const ( - maxCodeLen = 16 // max length of Huffman code - maxCodeLenMask = 15 // mask for max length of Huffman code - // The next three numbers come from the RFC section 3.2.7, with the - // additional proviso in section 3.2.5 which implies that distance codes - // 30 and 31 should never occur in compressed data. - maxNumLit = 286 - maxNumDist = 30 - numCodes = 19 // number of codes in Huffman meta-code - - debugDecode = false -) - -// Value of length - 3 and extra bits. -type lengthExtra struct { - length, extra uint8 -} - -var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} - -var bitMask32 = [32]uint32{ - 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, - 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, - 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, - 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, -} // up to 32 bits - -// Initialize the fixedHuffmanDecoder only once upon first use. -var ( - fixedOnce sync.Once - fixedHuffmanDecoder huffmanDecoder -) - -// A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError = flate.CorruptInputError - -// An InternalError reports an error in the flate code itself. -type InternalError string - -func (e InternalError) Error() string { return "flate: internal error: " + string(e) } - -// A ReadError reports an error encountered while reading input. -// -// Deprecated: No longer returned. -type ReadError = flate.ReadError - -// A WriteError reports an error encountered while writing output. -// -// Deprecated: No longer returned. -type WriteError = flate.WriteError - -// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to -// to switch to a new underlying Reader. This permits reusing a ReadCloser -// instead of allocating a new one. -type Resetter interface { - // Reset discards any buffered data and resets the Resetter as if it was - // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error -} - -// The data structure for decoding Huffman tables is based on that of -// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), -// For codes smaller than the table width, there are multiple entries -// (each combination of trailing bits has the same value). For codes -// larger than the table width, the table contains a link to an overflow -// table. The width of each entry in the link table is the maximum code -// size minus the chunk width. -// -// Note that you can do a lookup in the table even without all bits -// filled. Since the extra bits are zero, and the DEFLATE Huffman codes -// have the property that shorter codes come before longer ones, the -// bit length estimate in the result is a lower bound on the actual -// number of bits. -// -// See the following: -// http://www.gzip.org/algorithm.txt - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - maxRead int // the maximum number of bits we can read and not overread - chunks *[huffmanNumChunks]uint16 // chunks as described above - links [][]uint16 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(lengths []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.chunks == nil { - h.chunks = new([huffmanNumChunks]uint16) - } - - if h.maxRead != 0 { - *h = huffmanDecoder{chunks: h.chunks, links: h.links} - } - - // Count number of codes of each length, - // compute maxRead and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range lengths { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n&maxCodeLenMask]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i&maxCodeLenMask] = code - code += count[i&maxCodeLenMask] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1<<uint(max) && !(code == 1 && max == 1) { - if debugDecode { - fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)") - } - return false - } - - h.maxRead = min - - chunks := h.chunks[:] - for i := range chunks { - chunks[i] = 0 - } - - if max > huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - if cap(h.links) < huffmanNumChunks-link { - h.links = make([][]uint16, huffmanNumChunks-link) - } else { - h.links = h.links[:huffmanNumChunks-link] - } - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(bits.Reverse16(uint16(j))) - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1)) - if cap(h.links[off]) < numLinks { - h.links[off] = make([]uint16, numLinks) - } else { - h.links[off] = h.links[off][:numLinks] - } - } - } else { - h.links = h.links[:0] - } - - for i, n := range lengths { - if n == 0 { - continue - } - code := nextcode[n] - nextcode[n]++ - chunk := uint16(i<<huffmanValueShift | n) - reverse := int(bits.Reverse16(uint16(code))) - reverse >>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -// Reader is the actual read interface needed by NewReader. -// If the passed in io.Reader does not also have ReadByte, -// the NewReader will introduce its own buffering. -type Reader interface { - io.Reader - io.ByteReader -} - -type step uint8 - -const ( - copyData step = iota + 1 - nextBlock - huffmanBytesBuffer - huffmanBytesReader - huffmanBufioReader - huffmanStringsReader - huffmanGenericReader -) - -// flushMode tells decompressor when to return data -type flushMode uint8 - -const ( - syncFlush flushMode = iota // return data after sync flush block - partialFlush // return data after each block -) - -// Decompress state. -type decompressor struct { - // Input source. - r Reader - roffset int64 - - // Huffman decoders for literal/length, distance. - h1, h2 huffmanDecoder - - // Length arrays used to define Huffman codes. - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - // Output history, buffer. - dict dictDecoder - - // Next step in the decompression, - // and decompression state. - step step - stepState int - err error - toRead []byte - hl, hd *huffmanDecoder - copyLen int - copyDist int - - // Temporary buffer (avoids repeated allocation). - buf [4]byte - - // Input bits, in top of b. - b uint32 - - nb uint - final bool - - flushMode flushMode -} - -func (f *decompressor) nextBlock() { - for f.nb < 1+2 { - if f.err = f.moreBits(); f.err != nil { - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - if debugDecode { - fmt.Println("stored block") - } - case 1: - // compressed, fixed Huffman tables - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlockDecoder() - if debugDecode { - fmt.Println("predefinied huffman block") - } - case 2: - // compressed, dynamic Huffman tables - if f.err = f.readHuffman(); f.err != nil { - break - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlockDecoder() - if debugDecode { - fmt.Println("dynamic huffman block") - } - default: - // 3 is reserved. - if debugDecode { - fmt.Println("reserved data block encountered") - } - f.err = CorruptInputError(f.roffset) - } -} - -func (f *decompressor) Read(b []byte) (int, error) { - for { - if len(f.toRead) > 0 { - n := copy(b, f.toRead) - f.toRead = f.toRead[n:] - if len(f.toRead) == 0 { - return n, f.err - } - return n, nil - } - if f.err != nil { - return 0, f.err - } - - f.doStep() - - if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - } - } -} - -// WriteTo implements the io.WriteTo interface for io.Copy and friends. -func (f *decompressor) WriteTo(w io.Writer) (int64, error) { - total := int64(0) - flushed := false - for { - if len(f.toRead) > 0 { - n, err := w.Write(f.toRead) - total += int64(n) - if err != nil { - f.err = err - return total, err - } - if n != len(f.toRead) { - return total, io.ErrShortWrite - } - f.toRead = f.toRead[:0] - } - if f.err != nil && flushed { - if f.err == io.EOF { - return total, nil - } - return total, f.err - } - if f.err == nil { - f.doStep() - } - if len(f.toRead) == 0 && f.err != nil && !flushed { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - flushed = true - } - } -} - -func (f *decompressor) Close() error { - if f.err == io.EOF { - return nil - } - return f.err -} - -// RFC 1951 section 3.2.7. -// Compression with dynamic Huffman codes - -var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -func (f *decompressor) readHuffman() error { - // HLIT[5], HDIST[5], HCLEN[4]. - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - if debugDecode { - fmt.Println("nlit > maxNumLit", nlit) - } - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - if debugDecode { - fmt.Println("ndist > maxNumDist", ndist) - } - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - // numCodes is 19, so nclen is always valid. - f.b >>= 4 - f.nb -= 5 + 5 + 4 - - // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. - for i := range nclen { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - f.codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - for i := nclen; i < len(codeOrder); i++ { - f.codebits[codeOrder[i]] = 0 - } - if !f.h1.init(f.codebits[0:]) { - if debugDecode { - fmt.Println("init codebits failed") - } - return CorruptInputError(f.roffset) - } - - // HLIT + 257 code lengths, HDIST + 1 code lengths, - // using the code length Huffman code. - for i, n := 0, nlit+ndist; i < n; { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - if x < 16 { - // Actual length. - f.bits[i] = x - i++ - continue - } - // Repeat previous length or zero. - var rep int - var nb uint - var b int - switch x { - default: - return InternalError("unexpected length code") - case 16: - rep = 3 - nb = 2 - if i == 0 { - if debugDecode { - fmt.Println("i==0") - } - return CorruptInputError(f.roffset) - } - b = f.bits[i-1] - case 17: - rep = 3 - nb = 3 - b = 0 - case 18: - rep = 11 - nb = 7 - b = 0 - } - for f.nb < nb { - if err := f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits:", err) - } - return err - } - } - rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) - f.b >>= nb & regSizeMaskUint32 - f.nb -= nb - if i+rep > n { - if debugDecode { - fmt.Println("i+rep > n", i, rep, n) - } - return CorruptInputError(f.roffset) - } - for j := 0; j < rep; j++ { - f.bits[i] = b - i++ - } - } - - if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { - if debugDecode { - fmt.Println("init2 failed") - } - return CorruptInputError(f.roffset) - } - - // As an optimization, we can initialize the maxRead bits to read at a time - // for the HLIT tree to the length of the EOB marker since we know that - // every block must terminate with one. This preserves the property that - // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.maxRead < f.bits[endBlockMarker] { - f.h1.maxRead = f.bits[endBlockMarker] - } - if !f.final { - // If not the final block, the smallest block possible is - // a predefined table, BTYPE=01, with a single EOB marker. - // This will take up 3 + 7 bits. - f.h1.maxRead += 10 - } - - return nil -} - -// Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() { - // Uncompressed. - // Discard current half-byte. - left := (f.nb) & 7 - f.nb -= left - f.b >>= left - - offBytes := f.nb >> 3 - // Unfilled values will be overwritten. - f.buf[0] = uint8(f.b) - f.buf[1] = uint8(f.b >> 8) - f.buf[2] = uint8(f.b >> 16) - f.buf[3] = uint8(f.b >> 24) - - f.roffset += int64(offBytes) - f.nb, f.b = 0, 0 - - // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) - f.roffset += int64(nr) - if err != nil { - f.err = noEOF(err) - return - } - n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 - nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 - if nn != ^n { - if debugDecode { - ncomp := ^n - fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) - } - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - if f.flushMode == syncFlush { - f.toRead = f.dict.readFlush() - } - - f.finishBlock() - return - } - - f.copyLen = int(n) - f.copyData() -} - -// copyData copies f.copyLen bytes from the underlying reader into f.hist. -// It pauses for reads when f.hist is full. -func (f *decompressor) copyData() { - buf := f.dict.writeSlice() - if len(buf) > f.copyLen { - buf = buf[:f.copyLen] - } - - cnt, err := io.ReadFull(f.r, buf) - f.roffset += int64(cnt) - f.copyLen -= cnt - f.dict.writeMark(cnt) - if err != nil { - f.err = noEOF(err) - return - } - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = copyData - return - } - f.finishBlock() -} - -func (f *decompressor) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - - f.err = io.EOF - } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - - f.step = nextBlock -} - -func (f *decompressor) doStep() { - switch f.step { - case copyData: - f.copyData() - case nextBlock: - f.nextBlock() - case huffmanBytesBuffer: - f.huffmanBytesBuffer() - case huffmanBytesReader: - f.huffmanBytesReader() - case huffmanBufioReader: - f.huffmanBufioReader() - case huffmanStringsReader: - f.huffmanStringsReader() - case huffmanGenericReader: - f.huffmanGenericReader() - default: - panic("BUG: unexpected step state") - } -} - -// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. -func noEOF(e error) error { - if e == io.EOF { - return io.ErrUnexpectedEOF - } - return e -} - -func (f *decompressor) moreBits() error { - c, err := f.r.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << (f.nb & regSizeMaskUint32) - f.nb += 8 - return nil -} - -// Read the next Huffman-encoded symbol from f according to h. -func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(h.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - return 0, noEOF(err) - } - f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func makeReader(r io.Reader) Reader { - if rr, ok := r.(Reader); ok { - return rr - } - return bufio.NewReader(r) -} - -func fixedHuffmanDecoderInit() { - fixedOnce.Do(func() { - // These come from the RFC section 3.2.6. - var bits [288]int - for i := range 144 { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - fixedHuffmanDecoder.init(bits[:]) - }) -} - -func (f *decompressor) Reset(r io.Reader, dict []byte) error { - *f = decompressor{ - r: makeReader(r), - bits: f.bits, - codebits: f.codebits, - h1: f.h1, - h2: f.h2, - dict: f.dict, - step: nextBlock, - } - f.dict.init(maxMatchOffset, dict) - return nil -} - -type ReaderOpt func(*decompressor) - -// WithPartialBlock tells decompressor to return after each block, -// so it can read data written with partial flush -func WithPartialBlock() ReaderOpt { - return func(f *decompressor) { - f.flushMode = partialFlush - } -} - -// WithDict initializes the reader with a preset dictionary -func WithDict(dict []byte) ReaderOpt { - return func(f *decompressor) { - f.dict.init(maxMatchOffset, dict) - } -} - -// NewReaderOpts returns new reader with provided options -func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = nextBlock - f.dict.init(maxMatchOffset, nil) - - for _, opt := range opts { - opt(&f) - } - - return &f -} - -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { - return NewReaderOpts(r) -} - -// NewReaderDict is like NewReader but initializes the reader -// with a preset dictionary. The returned Reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by NewWriterDict. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - return NewReaderOpts(r, WithDict(dict)) -} diff --git a/internal/compress/flate/inflate_gen.go b/internal/compress/flate/inflate_gen.go deleted file mode 100644 index 2b2f993f..00000000 --- a/internal/compress/flate/inflate_gen.go +++ /dev/null @@ -1,1283 +0,0 @@ -// Code generated by go generate gen_inflate.go. DO NOT EDIT. - -package flate - -import ( - "bufio" - "bytes" - "fmt" - "math/bits" - "strings" -) - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBytesBuffer() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bytes.Buffer) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = huffmanBytesBuffer - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = huffmanBytesBuffer // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBytesReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bytes.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = huffmanBytesReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = huffmanBytesReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBufioReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bufio.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = huffmanBufioReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = huffmanBufioReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanStringsReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*strings.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = huffmanStringsReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = huffmanStringsReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanGenericReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = huffmanGenericReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<nb:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - extra |= fb & bitMask32[nb] - fb >>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = huffmanGenericReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -func (f *decompressor) huffmanBlockDecoder() { - switch f.r.(type) { - case *bytes.Buffer: - f.huffmanBytesBuffer() - case *bytes.Reader: - f.huffmanBytesReader() - case *bufio.Reader: - f.huffmanBufioReader() - case *strings.Reader: - f.huffmanStringsReader() - case Reader: - f.huffmanGenericReader() - default: - f.huffmanGenericReader() - } -} diff --git a/internal/compress/flate/inflate_test.go b/internal/compress/flate/inflate_test.go deleted file mode 100644 index f163695f..00000000 --- a/internal/compress/flate/inflate_test.go +++ /dev/null @@ -1,301 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "crypto/rand" - "io" - "os" - "strconv" - "strings" - "testing" -) - -func TestReset(t *testing.T) { - ss := []string{ - "lorem ipsum izzle fo rizzle", - "the quick brown fox jumped over", - } - - deflated := make([]bytes.Buffer, 2) - for i, s := range ss { - w, _ := NewWriter(&deflated[i], 1) - w.Write([]byte(s)) - w.Close() - } - - inflated := make([]bytes.Buffer, 2) - - f := NewReader(&deflated[0]) - io.Copy(&inflated[0], f) - f.(Resetter).Reset(&deflated[1], nil) - io.Copy(&inflated[1], f) - f.Close() - - for i, s := range ss { - if s != inflated[i].String() { - t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i].String(), s) - } - } -} - -func TestReaderTruncated(t *testing.T) { - vectors := []struct{ input, output string }{ - {"\x00", ""}, - {"\x00\f", ""}, - {"\x00\f\x00", ""}, - {"\x00\f\x00\xf3\xff", ""}, - {"\x00\f\x00\xf3\xffhello", "hello"}, - {"\x00\f\x00\xf3\xffhello, world", "hello, world"}, - {"\x02", ""}, - {"\xf2H\xcd", "He"}, - {"\xf2H͙0a\u0084\t", "Hel\x90\x90\x90\x90\x90"}, - {"\xf2H͙0a\u0084\t\x00", "Hel\x90\x90\x90\x90\x90"}, - } - - for i, v := range vectors { - r := strings.NewReader(v.input) - zr := NewReader(r) - b, err := io.ReadAll(zr) - if err != io.ErrUnexpectedEOF { - t.Errorf("test %d, error mismatch: got %v, want io.ErrUnexpectedEOF", i, err) - } - if string(b) != v.output { - t.Errorf("test %d, output mismatch: got %q, want %q", i, b, v.output) - } - } -} - -func TestResetDict(t *testing.T) { - dict := []byte("the lorem fox") - ss := []string{ - "lorem ipsum izzle fo rizzle", - "the quick brown fox jumped over", - } - - deflated := make([]bytes.Buffer, len(ss)) - for i, s := range ss { - w, _ := NewWriterDict(&deflated[i], DefaultCompression, dict) - w.Write([]byte(s)) - w.Close() - } - - inflated := make([]bytes.Buffer, len(ss)) - - f := NewReader(nil) - for i := range inflated { - f.(Resetter).Reset(&deflated[i], dict) - io.Copy(&inflated[i], f) - } - f.Close() - - for i, s := range ss { - if s != inflated[i].String() { - t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i].String(), s) - } - } -} - -// Tests ported from zlib/test/infcover.c -type infTest struct { - hex string - id string - n int -} - -var infTests = []infTest{ - {"0 0 0 0 0", "invalid stored block lengths", 1}, - {"3 0", "fixed", 0}, - {"6", "invalid block type", 1}, - {"1 1 0 fe ff 0", "stored", 0}, - {"fc 0 0", "too many length or distance symbols", 1}, - {"4 0 fe ff", "invalid code lengths set", 1}, - {"4 0 24 49 0", "invalid bit length repeat", 1}, - {"4 0 24 e9 ff ff", "invalid bit length repeat", 1}, - {"4 0 24 e9 ff 6d", "invalid code -- missing end-of-block", 1}, - {"4 80 49 92 24 49 92 24 71 ff ff 93 11 0", "invalid literal/lengths set", 1}, - {"4 80 49 92 24 49 92 24 f b4 ff ff c3 84", "invalid distances set", 1}, - {"4 c0 81 8 0 0 0 0 20 7f eb b 0 0", "invalid literal/length code", 1}, - {"2 7e ff ff", "invalid distance code", 1}, - {"c c0 81 0 0 0 0 0 90 ff 6b 4 0", "invalid distance too far back", 1}, - - // also trailer mismatch just in inflate() - {"1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 1", "incorrect data check", -1}, - {"1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1", "incorrect length check", -1}, - {"5 c0 21 d 0 0 0 80 b0 fe 6d 2f 91 6c", "pull 17", 0}, - {"5 e0 81 91 24 cb b2 2c 49 e2 f 2e 8b 9a 47 56 9f fb fe ec d2 ff 1f", "long code", 0}, - {"ed c0 1 1 0 0 0 40 20 ff 57 1b 42 2c 4f", "length extra", 0}, - {"ed cf c1 b1 2c 47 10 c4 30 fa 6f 35 1d 1 82 59 3d fb be 2e 2a fc f c", "long distance and extra", 0}, - {"ed c0 81 0 0 0 0 80 a0 fd a9 17 a9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6", "window end", 0}, -} - -func TestInflate(t *testing.T) { - for _, test := range infTests { - hex := strings.Split(test.hex, " ") - data := make([]byte, len(hex)) - for i, h := range hex { - b, _ := strconv.ParseInt(h, 16, 32) - data[i] = byte(b) - } - buf := bytes.NewReader(data) - r := NewReader(buf) - - _, err := io.Copy(io.Discard, r) - if (test.n == 0 && err == nil) || (test.n != 0 && err != nil) { - t.Logf("%q: OK:", test.id) - t.Logf(" - got %v", err) - continue - } - - if test.n == 0 && err != nil { - t.Errorf("%q: Expected no error, but got %v", test.id, err) - continue - } - - if test.n != 0 && err == nil { - t.Errorf("%q:Expected an error, but got none", test.id) - continue - } - t.Fatal(test.n, err) - } - - for _, test := range infOutTests { - hex := strings.Split(test.hex, " ") - data := make([]byte, len(hex)) - for i, h := range hex { - b, _ := strconv.ParseInt(h, 16, 32) - data[i] = byte(b) - } - buf := bytes.NewReader(data) - r := NewReader(buf) - - _, err := io.Copy(io.Discard, r) - if test.err == (err != nil) { - t.Logf("%q: OK:", test.id) - t.Logf(" - got %v", err) - continue - } - - if test.err == false && err != nil { - t.Errorf("%q: Expected no error, but got %v", test.id, err) - continue - } - - if test.err && err == nil { - t.Errorf("%q: Expected an error, but got none", test.id) - continue - } - t.Fatal(test.err, err) - } -} - -// Tests ported from zlib/test/infcover.c -// Since zlib inflate is push (writer) instead of pull (reader) -// some of the window size tests have been removed, since they -// are irrelevant. -type infOutTest struct { - hex string - id string - step int - win int - length int - err bool -} - -var infOutTests = []infOutTest{ - {"2 8 20 80 0 3 0", "inflate_fast TYPE return", 0, -15, 258, false}, - {"63 18 5 40 c 0", "window wrap", 3, -8, 300, false}, - {"e5 e0 81 ad 6d cb b2 2c c9 01 1e 59 63 ae 7d ee fb 4d fd b5 35 41 68 ff 7f 0f 0 0 0", "fast length extra bits", 0, -8, 258, true}, - {"25 fd 81 b5 6d 59 b6 6a 49 ea af 35 6 34 eb 8c b9 f6 b9 1e ef 67 49 50 fe ff ff 3f 0 0", "fast distance extra bits", 0, -8, 258, true}, - {"3 7e 0 0 0 0 0", "fast invalid distance code", 0, -8, 258, true}, - {"1b 7 0 0 0 0 0", "fast invalid literal/length code", 0, -8, 258, true}, - {"d c7 1 ae eb 38 c 4 41 a0 87 72 de df fb 1f b8 36 b1 38 5d ff ff 0", "fast 2nd level codes and too far back", 0, -8, 258, true}, - {"63 18 5 8c 10 8 0 0 0 0", "very common case", 0, -8, 259, false}, - {"63 60 60 18 c9 0 8 18 18 18 26 c0 28 0 29 0 0 0", "contiguous and wrap around window", 6, -8, 259, false}, - {"63 0 3 0 0 0 0 0", "copy direct from output", 0, -8, 259, false}, - {"1f 8b 0 0", "bad gzip method", 0, 31, 0, true}, - {"1f 8b 8 80", "bad gzip flags", 0, 31, 0, true}, - {"77 85", "bad zlib method", 0, 15, 0, true}, - {"78 9c", "bad zlib window size", 0, 8, 0, true}, - {"1f 8b 8 1e 0 0 0 0 0 0 1 0 0 0 0 0 0", "bad header crc", 0, 47, 1, true}, - {"1f 8b 8 2 0 0 0 0 0 0 1d 26 3 0 0 0 0 0 0 0 0 0", "check gzip length", 0, 47, 0, true}, - {"78 90", "bad zlib header check", 0, 47, 0, true}, - {"8 b8 0 0 0 1", "need dictionary", 0, 8, 0, true}, - {"63 18 68 30 d0 0 0", "force split window update", 4, -8, 259, false}, - {"3 0", "use fixed blocks", 0, -15, 1, false}, - {"", "bad window size", 0, 1, 0, true}, -} - -func TestWriteTo(t *testing.T) { - input := make([]byte, 100000) - n, err := rand.Read(input) - if err != nil { - t.Fatal(err) - } - if n != len(input) { - t.Fatal("did not fill buffer") - } - compressed := &bytes.Buffer{} - w, err := NewWriter(compressed, -2) - if err != nil { - t.Fatal(err) - } - n, err = w.Write(input) - if err != nil { - t.Fatal(err) - } - if n != len(input) { - t.Fatal("did not fill buffer") - } - w.Close() - buf := compressed.Bytes() - - dec := NewReader(bytes.NewBuffer(buf)) - // ReadAll does not use WriteTo, but we wrap it in a NopCloser to be sure. - readall, err := io.ReadAll(io.NopCloser(dec)) - if err != nil { - t.Fatal(err) - } - if len(readall) != len(input) { - t.Fatal("did not decompress everything") - } - - dec = NewReader(bytes.NewBuffer(buf)) - wtbuf := &bytes.Buffer{} - written, err := dec.(io.WriterTo).WriteTo(wtbuf) - if err != nil { - t.Fatal(err) - } - if written != int64(len(input)) { - t.Error("Returned length did not match, expected", len(input), "got", written) - } - if wtbuf.Len() != len(input) { - t.Error("Actual Length did not match, expected", len(input), "got", wtbuf.Len()) - } - if !bytes.Equal(wtbuf.Bytes(), input) { - t.Fatal("output did not match input") - } -} - -func TestReaderPartialBlock(t *testing.T) { - data, err := os.ReadFile("testdata/partial-block") - if err != nil { - t.Error(err) - } - - r := NewReaderOpts(bytes.NewReader(data), WithPartialBlock()) - rb := make([]byte, 32) - n, err := r.Read(rb) - if err != nil { - t.Fatalf("Read: %v", err) - } - - expected := "hello, world" - actual := string(rb[:n]) - if expected != actual { - t.Fatalf("expected: %v, got: %v", expected, actual) - } -} diff --git a/internal/compress/flate/level1.go b/internal/compress/flate/level1.go deleted file mode 100644 index d7bad88d..00000000 --- a/internal/compress/flate/level1.go +++ /dev/null @@ -1,215 +0,0 @@ -package flate - -import ( - "fmt" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastEncL1 struct { - fastGen - table [tableSize]tableEntry -} - -// EncodeL1 uses a similar algorithm to level 1 -func (e *fastEncL1) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashBytes = 5 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - - for { - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - var t int32 - for { - nextHash := hashLen(cv, tableBits, hashBytes) - candidate = e.table[nextHash] - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - - now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hashLen(now, tableBits, hashBytes) - t = candidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - - // Do one right away... - cv = now - s = nextS - nextS++ - candidate = e.table[nextHash] - now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur} - - t = candidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - cv = now - s = nextS - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - l := e.matchlenLong(int(s+4), int(t+4), src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && le.Load8(src, t-1) == le.Load8(src, s-1) { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - // Save the match found - if false { - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - } else { - // Inlined... - xoffset := uint32(s - t - baseMatchOffset) - xlength := l - oc := offsetCode(xoffset) - xoffset |= oc << 16 - for xlength > 0 { - xl := xlength - if xl > 258 { - if xl > 258+baseMatchLength { - xl = 258 - } else { - xl = 258 - baseMatchLength - } - } - xlength -= xl - xl -= baseMatchLength - dst.extraHist[lengthCodes1[uint8(xl)]]++ - dst.offHist[oc]++ - dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) - dst.n++ - } - } - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - if s >= sLimit { - // Index first pair after match end. - if int(s+l+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6432(src, s-2) - o := e.cur + s - 2 - prevHash := hashLen(x, tableBits, hashBytes) - e.table[prevHash] = tableEntry{offset: o} - x >>= 16 - currHash := hashLen(x, tableBits, hashBytes) - candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2} - - t = candidate.offset - e.cur - if s-t > maxMatchOffset || uint32(x) != load3232(src, t) { - cv = x >> 8 - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/level2.go b/internal/compress/flate/level2.go deleted file mode 100644 index c8d047f2..00000000 --- a/internal/compress/flate/level2.go +++ /dev/null @@ -1,214 +0,0 @@ -package flate - -import "fmt" - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastEncL2 struct { - fastGen - table [bTableSize]tableEntry -} - -// EncodeL2 uses a similar algorithm to level 1, but is capable -// of matching across blocks giving better compression at a small slowdown. -func (e *fastEncL2) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashBytes = 5 - ) - - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - // When should we start skipping if we haven't found matches in a long while. - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - for { - nextHash := hashLen(cv, bTableBits, hashBytes) - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - candidate = e.table[nextHash] - now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hashLen(now, bTableBits, hashBytes) - - offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - - // Do one right away... - cv = now - s = nextS - nextS++ - candidate = e.table[nextHash] - now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur} - - offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - break - } - cv = now - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - t := candidate.offset - e.cur - l := e.matchlenLong(int(s+4), int(t+4), src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index first pair after match end. - if int(s+l+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // Store every second hash in-between, but offset by 1. - for i := s - l + 2; i < s-5; i += 7 { - x := load6432(src, i) - nextHash := hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i} - // Skip one - x >>= 16 - nextHash = hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i + 2} - // Skip one - x >>= 16 - nextHash = hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i + 4} - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 to s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6432(src, s-2) - o := e.cur + s - 2 - prevHash := hashLen(x, bTableBits, hashBytes) - prevHash2 := hashLen(x>>8, bTableBits, hashBytes) - e.table[prevHash] = tableEntry{offset: o} - e.table[prevHash2] = tableEntry{offset: o + 1} - currHash := hashLen(x>>16, bTableBits, hashBytes) - candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2} - - offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { - cv = x >> 24 - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/level3.go b/internal/compress/flate/level3.go deleted file mode 100644 index 2cef0290..00000000 --- a/internal/compress/flate/level3.go +++ /dev/null @@ -1,242 +0,0 @@ -package flate - -import "fmt" - -// fastEncL3 -type fastEncL3 struct { - fastGen - table [1 << 16]tableEntryPrev -} - -// Encode uses a similar algorithm to level 2, will check up to two candidates. -func (e *fastEncL3) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - tableBits = 16 - tableSize = 1 << tableBits - hashBytes = 5 - ) - - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - } - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - e.table[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // Skip if too small. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 7 - nextS := s - var candidate tableEntry - for { - nextHash := hashLen(cv, tableBits, hashBytes) - s = nextS - nextS = s + 1 + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - candidates := e.table[nextHash] - now := load6432(src, nextS) - - // Safe offset distance until s + 4... - minOffset := e.cur + s - (maxMatchOffset - 4) - e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} - - // Check both candidates - candidate = candidates.Cur - if candidate.offset < minOffset { - cv = now - // Previous will also be invalid, we have nothing. - continue - } - - if uint32(cv) == load3232(src, candidate.offset-e.cur) { - if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { - break - } - // Both match and are valid, pick longest. - offset := s - (candidate.offset - e.cur) - o2 := s - (candidates.Prev.offset - e.cur) - l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) - if l2 > l1 { - candidate = candidates.Prev - } - break - } else { - // We only check if value mismatches. - // Offset will always be invalid in other cases. - candidate = candidates.Prev - if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - break - } - } - cv = now - } - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - // - t := candidate.offset - e.cur - l := e.matchlenLong(int(s+4), int(t+4), src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - t += l - // Index first pair after match end. - if int(t+8) < len(src) && t > 0 { - cv = load6432(src, t) - nextHash := hashLen(cv, tableBits, hashBytes) - e.table[nextHash] = tableEntryPrev{ - Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + t}, - } - } - goto emitRemainder - } - - // Store every 5th hash in-between. - for i := s - l + 2; i < s-5; i += 6 { - nextHash := hashLen(load6432(src, i), tableBits, hashBytes) - e.table[nextHash] = tableEntryPrev{ - Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + i}, - } - } - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 to s. - x := load6432(src, s-2) - prevHash := hashLen(x, tableBits, hashBytes) - - e.table[prevHash] = tableEntryPrev{ - Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 2}, - } - x >>= 8 - prevHash = hashLen(x, tableBits, hashBytes) - - e.table[prevHash] = tableEntryPrev{ - Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 1}, - } - x >>= 8 - currHash := hashLen(x, tableBits, hashBytes) - candidates := e.table[currHash] - cv = x - e.table[currHash] = tableEntryPrev{ - Prev: candidates.Cur, - Cur: tableEntry{offset: s + e.cur}, - } - - // Check both candidates - candidate = candidates.Cur - minOffset := e.cur + s - (maxMatchOffset - 4) - - if candidate.offset > minOffset { - if uint32(cv) == load3232(src, candidate.offset-e.cur) { - // Found a match... - continue - } - candidate = candidates.Prev - if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - // Match at prev... - continue - } - } - cv = x >> 8 - s++ - break - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/level4.go b/internal/compress/flate/level4.go deleted file mode 100644 index 88509e19..00000000 --- a/internal/compress/flate/level4.go +++ /dev/null @@ -1,221 +0,0 @@ -package flate - -import "fmt" - -type fastEncL4 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntry -} - -func (e *fastEncL4) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.bTable[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 6 - const doEvery = 1 - - nextS := s - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - e.bTable[nextHashL] = entry - - t = lCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // We got a long match. Use that. - break - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Found a 4 match... - lCandidate = e.bTable[hash7(next, tableBits)] - - // If the next long is a candidate, check if we should use that instead... - lOff := lCandidate.offset - e.cur - if nextS-lOff < maxMatchOffset && load3232(src, lOff) == uint32(next) { - l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) - if l2 > l1 { - s = nextS - t = lCandidate.offset - e.cur - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Extend the 4-byte match as long as possible. - l := e.matchlenLong(int(s+4), int(t+4), src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if debugDeflate { - if t >= s { - panic("s-t") - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index first pair after match end. - if int(s+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur} - e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // Store every 3rd hash in-between - if true { - i := nextS - if i < s-1 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - e.bTable[hash7(cv, tableBits)] = t - e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - - i += 3 - for ; i < s-1; i += 3 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - e.bTable[hash7(cv, tableBits)] = t - e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - } - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - x := load6432(src, s-1) - o := e.cur + s - 1 - prevHashS := hashLen(x, tableBits, hashShortBytes) - prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o} - e.bTable[prevHashL] = tableEntry{offset: o} - cv = x >> 8 - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/level5.go b/internal/compress/flate/level5.go deleted file mode 100644 index a22ad7d1..00000000 --- a/internal/compress/flate/level5.go +++ /dev/null @@ -1,705 +0,0 @@ -package flate - -import "fmt" - -type fastEncL5 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntryPrev -} - -func (e *fastEncL5) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - v.Prev.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - } - e.bTable[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 6 - const doEvery = 1 - - nextS := s - var l int32 - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = entry, eLong.Cur - - nextHashS = hashLen(next, tableBits, hashShortBytes) - nextHashL = hash7(next, tableBits) - - t = lCandidate.Cur.offset - e.cur - if s-t < maxMatchOffset { - if uint32(cv) == load3232(src, t) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { - l = e.matchlen(int(s+4), int(t+4), src) + 4 - ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4 - if ml1 > l { - t = t2 - l = ml1 - break - } - } - break - } - t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - break - } - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Found a 4 match... - l = e.matchlen(int(s+4), int(t+4), src) + 4 - lCandidate = e.bTable[nextHashL] - // Store the next match - - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // If the next long is a candidate, use that... - t2 := lCandidate.Cur.offset - e.cur - if nextS-t2 < maxMatchOffset { - if load3232(src, t2) == uint32(next) { - ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - // If the previous long is a candidate, use that... - t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { - ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - if l == 0 { - // Extend the 4-byte match as long as possible. - l = e.matchlenLong(int(s+4), int(t+4), src) + 4 - } else if l == maxMatchLength { - l += e.matchlenLong(int(s+l), int(t+l), src) - } - - // Try to locate a better match by checking the end of best match... - if sAt := s + l; l < 30 && sAt < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is 2/3 bytes depending on input. - // 3 is only a little better when it is but sometimes a lot worse. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 2 - eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - t2 := eLong - e.cur - l + skipBeginning - s2 := s + skipBeginning - off := s2 - t2 - if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if debugDeflate { - if t >= s { - panic(fmt.Sprintln("s-t", s, t)) - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", s-t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - goto emitRemainder - } - - // Store every 3rd hash in-between. - if true { - const hashEvery = 3 - i := s - l + 1 - if i < s-1 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // Do an long at i+1 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - eLong = &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // We only have enough bits for a short entry at i+2 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - - // Skip one - otherwise we risk hitting 's' - i += 4 - for ; i < s-1; i += hashEvery { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - } - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - x := load6432(src, s-1) - o := e.cur + s - 1 - prevHashS := hashLen(x, tableBits, hashShortBytes) - prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o} - eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur - cv = x >> 8 - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} - -// fastEncL5Window is a level 5 encoder, -// but with a custom window size. -type fastEncL5Window struct { - hist []byte - cur int32 - maxOffset int32 - table [tableSize]tableEntry - bTable [tableSize]tableEntryPrev -} - -func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - maxMatchOffset := e.maxOffset - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - v.Prev.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - } - e.bTable[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 6 - const doEvery = 1 - - nextS := s - var l int32 - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = entry, eLong.Cur - - nextHashS = hashLen(next, tableBits, hashShortBytes) - nextHashL = hash7(next, tableBits) - - t = lCandidate.Cur.offset - e.cur - if s-t < maxMatchOffset { - if uint32(cv) == load3232(src, t) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { - l = e.matchlen(s+4, t+4, src) + 4 - ml1 := e.matchlen(s+4, t2+4, src) + 4 - if ml1 > l { - t = t2 - l = ml1 - break - } - } - break - } - t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - break - } - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Found a 4 match... - l = e.matchlen(s+4, t+4, src) + 4 - lCandidate = e.bTable[nextHashL] - // Store the next match - - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // If the next long is a candidate, use that... - t2 := lCandidate.Cur.offset - e.cur - if nextS-t2 < maxMatchOffset { - if load3232(src, t2) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - // If the previous long is a candidate, use that... - t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - if l == 0 { - // Extend the 4-byte match as long as possible. - l = e.matchlenLong(s+4, t+4, src) + 4 - } else if l == maxMatchLength { - l += e.matchlenLong(s+l, t+l, src) - } - - // Try to locate a better match by checking the end of best match... - if sAt := s + l; l < 30 && sAt < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is 2/3 bytes depending on input. - // 3 is only a little better when it is but sometimes a lot worse. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 2 - eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - t2 := eLong - e.cur - l + skipBeginning - s2 := s + skipBeginning - off := s2 - t2 - if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(s2, t2, src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if debugDeflate { - if t >= s { - panic(fmt.Sprintln("s-t", s, t)) - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", s-t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - goto emitRemainder - } - - // Store every 3rd hash in-between. - if true { - const hashEvery = 3 - i := s - l + 1 - if i < s-1 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // Do an long at i+1 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - eLong = &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // We only have enough bits for a short entry at i+2 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - - // Skip one - otherwise we risk hitting 's' - i += 4 - for ; i < s-1; i += hashEvery { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - } - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - x := load6432(src, s-1) - o := e.cur + s - 1 - prevHashS := hashLen(x, tableBits, hashShortBytes) - prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o} - eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur - cv = x >> 8 - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} - -// Reset the encoding table. -func (e *fastEncL5Window) Reset() { - // We keep the same allocs, since we are compressing the same block sizes. - if cap(e.hist) < allocHistory { - e.hist = make([]byte, 0, allocHistory) - } - - // We offset current position so everything will be out of reach. - // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. - if e.cur <= int32(bufferReset) { - e.cur += e.maxOffset + int32(len(e.hist)) - } - e.hist = e.hist[:0] -} - -func (e *fastEncL5Window) addBlock(src []byte) int32 { - // check if we have space already - maxMatchOffset := e.maxOffset - - if len(e.hist)+len(src) > cap(e.hist) { - if cap(e.hist) == 0 { - e.hist = make([]byte, 0, allocHistory) - } else { - if cap(e.hist) < int(maxMatchOffset*2) { - panic("unexpected buffer size") - } - // Move down - offset := int32(len(e.hist)) - maxMatchOffset - copy(e.hist[0:maxMatchOffset], e.hist[offset:]) - e.cur += offset - e.hist = e.hist[:maxMatchOffset] - } - } - s := int32(len(e.hist)) - e.hist = append(e.hist, src...) - return s -} - -// matchlen will return the match length between offsets and t in src. -// The maximum length returned is maxMatchLength - 4. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { - if debugDecode { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > e.maxOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - s1 := min(int(s)+maxMatchLength-4, len(src)) - - // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) -} - -// matchlenLong will return the match length between offsets and t in src. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { - if debugDeflate { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > e.maxOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - // Extend the match to be as long as possible. - return int32(matchLen(src[s:], src[t:])) -} diff --git a/internal/compress/flate/level6.go b/internal/compress/flate/level6.go deleted file mode 100644 index 96f5bb43..00000000 --- a/internal/compress/flate/level6.go +++ /dev/null @@ -1,325 +0,0 @@ -package flate - -import "fmt" - -type fastEncL6 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntryPrev -} - -func (e *fastEncL6) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - v.Prev.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - } - e.bTable[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - // Repeat MUST be > 1 and within range - repeat := int32(1) - for { - const skipLog = 7 - const doEvery = 1 - - nextS := s - var l int32 - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = entry, eLong.Cur - - // Calculate hashes of 'next' - nextHashS = hashLen(next, tableBits, hashShortBytes) - nextHashL = hash7(next, tableBits) - - t = lCandidate.Cur.offset - e.cur - if s-t < maxMatchOffset { - if uint32(cv) == load3232(src, t) { - // Long candidate matches at least 4 bytes. - - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // Check the previous long candidate as well. - t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { - l = e.matchlen(int(s+4), int(t+4), src) + 4 - ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4 - if ml1 > l { - t = t2 - l = ml1 - break - } - } - break - } - // Current value did not match, but check if previous long value does. - t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - break - } - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { - // Found a 4 match... - l = e.matchlen(int(s+4), int(t+4), src) + 4 - - // Look up next long candidate (at nextS) - lCandidate = e.bTable[nextHashL] - - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // Check repeat at s + repOff - const repOff = 1 - t2 := s - repeat + repOff - if load3232(src, t2) == uint32(cv>>(8*repOff)) { - ml := e.matchlen(int(s+4+repOff), int(t2+4), src) + 4 - if ml > l { - t = t2 - l = ml - s += repOff - // Not worth checking more. - break - } - } - - // If the next long is a candidate, use that... - t2 = lCandidate.Cur.offset - e.cur - if nextS-t2 < maxMatchOffset { - if load3232(src, t2) == uint32(next) { - ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - // This is ok, but check previous as well. - } - } - // If the previous long is a candidate, use that... - t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { - ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Extend the 4-byte match as long as possible. - if l == 0 { - l = e.matchlenLong(int(s+4), int(t+4), src) + 4 - } else if l == maxMatchLength { - l += e.matchlenLong(int(s+l), int(t+l), src) - } - - // Try to locate a better match by checking the end-of-match... - if sAt := s + l; sAt < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is 2/3 bytes depending on input. - // 3 is only a little better when it is but sometimes a lot worse. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 2 - eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] - // Test current - t2 := eLong.Cur.offset - e.cur - l + skipBeginning - s2 := s + skipBeginning - off := s2 - t2 - if off < maxMatchOffset { - if off > 0 && t2 >= 0 { - if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - // Test next: - t2 = eLong.Prev.offset - e.cur - l + skipBeginning - off := s2 - t2 - if off > 0 && off < maxMatchOffset && t2 >= 0 { - if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if false { - if t >= s { - panic(fmt.Sprintln("s-t", s, t)) - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", s-t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - repeat = s - t - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index after match end. - for i := nextS + 1; i < int32(len(src))-8; i += 2 { - cv := load6432(src, i) - e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur - } - goto emitRemainder - } - - // Store every long hash in-between and every second short. - if true { - for i := nextS + 1; i < s-1; i += 2 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong2 := &e.bTable[hash7(cv>>8, tableBits)] - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - eLong.Cur, eLong.Prev = t, eLong.Cur - eLong2.Cur, eLong2.Prev = t2, eLong2.Cur - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - cv = load6432(src, s) - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/matchlen_generic.go b/internal/compress/flate/matchlen_generic.go deleted file mode 100644 index 0ccaeb93..00000000 --- a/internal/compress/flate/matchlen_generic.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. - -package flate - -import ( - "math/bits" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -// matchLen returns the maximum common prefix length of a and b. -// a must be the shortest of the two. -func matchLen(a, b []byte) (n int) { - left := len(a) - for left >= 8 { - diff := le.Load64(a, n) ^ le.Load64(b, n) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 - } - n += 8 - left -= 8 - } - - a = a[n:] - b = b[n:] - for i := range a { - if a[i] != b[i] { - break - } - n++ - } - return n -} diff --git a/internal/compress/flate/reader_test.go b/internal/compress/flate/reader_test.go deleted file mode 100644 index 6eedfb9b..00000000 --- a/internal/compress/flate/reader_test.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "io" - "os" - "runtime" - "strings" - "testing" -) - -func TestNlitOutOfRange(t *testing.T) { - // Trying to decode this bogus flate data, which has a Huffman table - // with nlit=288, should not panic. - io.Copy(io.Discard, NewReader(strings.NewReader( - "\xfc\xfe\x36\xe7\x5e\x1c\xef\xb3\x55\x58\x77\xb6\x56\xb5\x43\xf4"+ - "\x6f\xf2\xd2\xe6\x3d\x99\xa0\x85\x8c\x48\xeb\xf8\xda\x83\x04\x2a"+ - "\x75\xc4\xf8\x0f\x12\x11\xb9\xb4\x4b\x09\xa0\xbe\x8b\x91\x4c"))) -} - -const ( - digits = iota - twain - random -) - -var testfiles = []string{ - // Digits is the digits of the irrational number e. Its decimal representation - // does not repeat, but there are only 10 possible digits, so it should be - // reasonably compressible. - digits: "../testdata/e.txt", - // Twain is Project Gutenberg's edition of Mark Twain's classic English novel. - twain: "../testdata/Mark.Twain-Tom.Sawyer.txt", - // Random bytes - random: "../testdata/sharnd.out", -} - -func benchmarkDecode(b *testing.B, testfile, level, n int) { - b.ReportAllocs() - b.StopTimer() - b.SetBytes(int64(n)) - buf0, err := os.ReadFile(testfiles[testfile]) - if err != nil { - b.Fatal(err) - } - if len(buf0) == 0 { - b.Fatalf("test file %q has no data", testfiles[testfile]) - } - compressed := new(bytes.Buffer) - w, err := NewWriter(compressed, level) - if err != nil { - b.Fatal(err) - } - for i := 0; i < n; i += len(buf0) { - if len(buf0) > n-i { - buf0 = buf0[:n-i] - } - io.Copy(w, bytes.NewReader(buf0)) - } - w.Close() - buf1 := compressed.Bytes() - buf0, compressed, w = nil, nil, nil - r := NewReader(bytes.NewReader(buf1)) - res := r.(Resetter) - runtime.GC() - b.StartTimer() - - for i := 0; i < b.N; i++ { - _ = res.Reset(bytes.NewReader(buf1), nil) - _, _ = io.Copy(io.Discard, r) - } -} - -// These short names are so that gofmt doesn't break the BenchmarkXxx function -// bodies below over multiple lines. -const ( - constant = ConstantCompression - speed = BestSpeed - default_ = DefaultCompression - compress = BestCompression - oneK = -1024 -) - -func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) } -func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) } -func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) } -func BenchmarkDecodeDigitsDefault1e4(b *testing.B) { benchmarkDecode(b, digits, default_, 1e4) } -func BenchmarkDecodeDigitsDefault1e5(b *testing.B) { benchmarkDecode(b, digits, default_, 1e5) } -func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits, default_, 1e6) } -func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) } -func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) } -func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) } -func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) } -func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) } -func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) } -func BenchmarkDecodeTwainDefault1e4(b *testing.B) { benchmarkDecode(b, twain, default_, 1e4) } -func BenchmarkDecodeTwainDefault1e5(b *testing.B) { benchmarkDecode(b, twain, default_, 1e5) } -func BenchmarkDecodeTwainDefault1e6(b *testing.B) { benchmarkDecode(b, twain, default_, 1e6) } -func BenchmarkDecodeTwainCompress1e4(b *testing.B) { benchmarkDecode(b, twain, compress, 1e4) } -func BenchmarkDecodeTwainCompress1e5(b *testing.B) { benchmarkDecode(b, twain, compress, 1e5) } -func BenchmarkDecodeTwainCompress1e6(b *testing.B) { benchmarkDecode(b, twain, compress, 1e6) } -func BenchmarkDecodeRandomSpeed1e4(b *testing.B) { benchmarkDecode(b, random, speed, 1e4) } -func BenchmarkDecodeRandomSpeed1e5(b *testing.B) { benchmarkDecode(b, random, speed, 1e5) } -func BenchmarkDecodeRandomSpeed1e6(b *testing.B) { benchmarkDecode(b, random, speed, 1e6) } diff --git a/internal/compress/flate/regmask_amd64.go b/internal/compress/flate/regmask_amd64.go deleted file mode 100644 index 6ed28061..00000000 --- a/internal/compress/flate/regmask_amd64.go +++ /dev/null @@ -1,37 +0,0 @@ -package flate - -const ( - // Masks for shifts with register sizes of the shift value. - // This can be used to work around the x86 design of shifting by mod register size. - // It can be used when a variable shift is always smaller than the register size. - - // reg8SizeMaskX - shift value is 8 bits, shifted is X - reg8SizeMask8 = 7 - reg8SizeMask16 = 15 - reg8SizeMask32 = 31 - reg8SizeMask64 = 63 - - // reg16SizeMaskX - shift value is 16 bits, shifted is X - reg16SizeMask8 = reg8SizeMask8 - reg16SizeMask16 = reg8SizeMask16 - reg16SizeMask32 = reg8SizeMask32 - reg16SizeMask64 = reg8SizeMask64 - - // reg32SizeMaskX - shift value is 32 bits, shifted is X - reg32SizeMask8 = reg8SizeMask8 - reg32SizeMask16 = reg8SizeMask16 - reg32SizeMask32 = reg8SizeMask32 - reg32SizeMask64 = reg8SizeMask64 - - // reg64SizeMaskX - shift value is 64 bits, shifted is X - reg64SizeMask8 = reg8SizeMask8 - reg64SizeMask16 = reg8SizeMask16 - reg64SizeMask32 = reg8SizeMask32 - reg64SizeMask64 = reg8SizeMask64 - - // regSizeMaskUintX - shift value is uint, shifted is X - regSizeMaskUint8 = reg8SizeMask8 - regSizeMaskUint16 = reg8SizeMask16 - regSizeMaskUint32 = reg8SizeMask32 - regSizeMaskUint64 = reg8SizeMask64 -) diff --git a/internal/compress/flate/regmask_other.go b/internal/compress/flate/regmask_other.go deleted file mode 100644 index e62caf71..00000000 --- a/internal/compress/flate/regmask_other.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build !amd64 - -package flate - -const ( - // Masks for shifts with register sizes of the shift value. - // This can be used to work around the x86 design of shifting by mod register size. - // It can be used when a variable shift is always smaller than the register size. - - // reg8SizeMaskX - shift value is 8 bits, shifted is X - reg8SizeMask8 = 0xff - reg8SizeMask16 = 0xff - reg8SizeMask32 = 0xff - reg8SizeMask64 = 0xff - - // reg16SizeMaskX - shift value is 16 bits, shifted is X - reg16SizeMask8 = 0xffff - reg16SizeMask16 = 0xffff - reg16SizeMask32 = 0xffff - reg16SizeMask64 = 0xffff - - // reg32SizeMaskX - shift value is 32 bits, shifted is X - reg32SizeMask8 = 0xffffffff - reg32SizeMask16 = 0xffffffff - reg32SizeMask32 = 0xffffffff - reg32SizeMask64 = 0xffffffff - - // reg64SizeMaskX - shift value is 64 bits, shifted is X - reg64SizeMask8 = 0xffffffffffffffff - reg64SizeMask16 = 0xffffffffffffffff - reg64SizeMask32 = 0xffffffffffffffff - reg64SizeMask64 = 0xffffffffffffffff - - // regSizeMaskUintX - shift value is uint, shifted is X - regSizeMaskUint8 = ^uint(0) - regSizeMaskUint16 = ^uint(0) - regSizeMaskUint32 = ^uint(0) - regSizeMaskUint64 = ^uint(0) -) diff --git a/internal/compress/flate/stateless.go b/internal/compress/flate/stateless.go deleted file mode 100644 index 8f86e2e6..00000000 --- a/internal/compress/flate/stateless.go +++ /dev/null @@ -1,325 +0,0 @@ -package flate - -import ( - "io" - "math" - "sync" - - "lindenii.org/go/furgit/internal/compress/internal/le" -) - -const ( - maxStatelessBlock = math.MaxInt16 - // dictionary will be taken from maxStatelessBlock, so limit it. - maxStatelessDict = 8 << 10 - - slTableBits = 13 - slTableSize = 1 << slTableBits - slTableShift = 32 - slTableBits -) - -type statelessWriter struct { - dst io.Writer - closed bool -} - -func (s *statelessWriter) Close() error { - if s.closed { - return nil - } - s.closed = true - // Emit EOF block - return StatelessDeflate(s.dst, nil, true, nil) -} - -func (s *statelessWriter) Write(p []byte) (n int, err error) { - err = StatelessDeflate(s.dst, p, false, nil) - if err != nil { - return 0, err - } - return len(p), nil -} - -func (s *statelessWriter) Reset(w io.Writer) { - s.dst = w - s.closed = false -} - -// NewStatelessWriter will do compression but without maintaining any state -// between Write calls. -// There will be no memory kept between Write calls, -// but compression and speed will be suboptimal. -// Because of this, the size of actual Write calls will affect output size. -func NewStatelessWriter(dst io.Writer) io.WriteCloser { - return &statelessWriter{dst: dst} -} - -// bitWriterPool contains bit writers that can be reused. -var bitWriterPool = sync.Pool{ - New: func() any { - return newHuffmanBitWriter(nil) - }, -} - -// tokensPool contains tokens struct objects that can be reused -var tokensPool = sync.Pool{ - New: func() any { - return &tokens{} - }, -} - -// StatelessDeflate allows compressing directly to a Writer without retaining state. -// When returning everything will be flushed. -// Up to 8KB of an optional dictionary can be given which is presumed to precede the block. -// Longer dictionaries will be truncated and will still produce valid output. -// Sending nil dictionary is perfectly fine. -func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { - bw := bitWriterPool.Get().(*huffmanBitWriter) - bw.reset(out) - defer func() { - // don't keep a reference to our output - bw.reset(nil) - bitWriterPool.Put(bw) - }() - if eof && len(in) == 0 { - // Just write an EOF block. - // Could be faster... - bw.writeStoredHeader(0, true) - bw.flush() - return bw.err - } - - // Truncate dict - if len(dict) > maxStatelessDict { - dict = dict[len(dict)-maxStatelessDict:] - } - - // For subsequent loops, keep shallow dict reference to avoid alloc+copy. - var inDict []byte - - dst := tokensPool.Get().(*tokens) - dst.Reset() - defer func() { - tokensPool.Put(dst) - }() - - for len(in) > 0 { - todo := in - if len(inDict) > 0 { - if len(todo) > maxStatelessBlock-maxStatelessDict { - todo = todo[:maxStatelessBlock-maxStatelessDict] - } - } else if len(todo) > maxStatelessBlock-len(dict) { - todo = todo[:maxStatelessBlock-len(dict)] - } - inOrg := in - in = in[len(todo):] - uncompressed := todo - if len(dict) > 0 { - // combine dict and source - bufLen := len(todo) + len(dict) - combined := make([]byte, bufLen) - copy(combined, dict) - copy(combined[len(dict):], todo) - todo = combined - } - // Compress - if len(inDict) == 0 { - statelessEnc(dst, todo, int16(len(dict))) - } else { - statelessEnc(dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) - } - isEof := eof && len(in) == 0 - - if dst.n == 0 { - bw.writeStoredHeader(len(uncompressed), isEof) - if bw.err != nil { - return bw.err - } - bw.writeBytes(uncompressed) - } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { - // If we removed less than 1/16th, huffman compress the block. - bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) - } else { - bw.writeBlockDynamic(dst, isEof, uncompressed, len(in) == 0) - } - if len(in) > 0 { - // Retain a dict if we have more - inDict = inOrg[len(uncompressed)-maxStatelessDict:] - dict = nil - dst.Reset() - } - if bw.err != nil { - return bw.err - } - } - if !eof { - // Align, only a stored block can do that. - bw.writeStoredHeader(0, false) - } - bw.flush() - return bw.err -} - -func hashSL(u uint32) uint32 { - return (u * 0x1e35a7bd) >> slTableShift -} - -func load3216(b []byte, i int16) uint32 { - return le.Load32(b, i) -} - -func load6416(b []byte, i int16) uint64 { - return le.Load64(b, i) -} - -func statelessEnc(dst *tokens, src []byte, startAt int16) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - ) - - type tableEntry struct { - offset int16 - } - - var table [slTableSize]tableEntry - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src)-int(startAt) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = 0 - return - } - // Index until startAt - if startAt > 0 { - cv := load3232(src, 0) - for i := range startAt { - table[hashSL(cv)] = tableEntry{offset: i} - cv = (cv >> 8) | (uint32(src[i+4]) << 24) - } - } - - s := startAt + 1 - nextEmit := startAt - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int16(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load3216(src, s) - - for { - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - for { - nextHash := hashSL(cv) - candidate = table[nextHash] - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit || nextS <= 0 { - goto emitRemainder - } - - now := load6416(src, nextS) - table[nextHash] = tableEntry{offset: s} - nextHash = hashSL(uint32(now)) - - if cv == load3216(src, candidate.offset) { - table[nextHash] = tableEntry{offset: nextS} - break - } - - // Do one right away... - cv = uint32(now) - s = nextS - nextS++ - candidate = table[nextHash] - now >>= 8 - table[nextHash] = tableEntry{offset: s} - - if cv == load3216(src, candidate.offset) { - table[nextHash] = tableEntry{offset: nextS} - break - } - cv = uint32(now) - s = nextS - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - t := candidate.offset - l := int16(matchLen(src[s+4:], src[t+4:]) + 4) - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - // Save the match found - dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6416(src, s-2) - o := s - 2 - prevHash := hashSL(uint32(x)) - table[prevHash] = tableEntry{offset: o} - x >>= 16 - currHash := hashSL(uint32(x)) - candidate = table[currHash] - table[currHash] = tableEntry{offset: o + 2} - - if uint32(x) != load3216(src, candidate.offset) { - cv = uint32(x >> 8) - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip b/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip Binary files differdeleted file mode 100644 index feae35f1..00000000 --- a/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip +++ /dev/null diff --git a/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip b/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip Binary files differdeleted file mode 100644 index 7b33f54f..00000000 --- a/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.dyn.expect b/internal/compress/flate/testdata/huffman-null-max.dyn.expect Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.golden b/internal/compress/flate/testdata/huffman-null-max.golden Binary files differdeleted file mode 100644 index db422ca3..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.in b/internal/compress/flate/testdata/huffman-null-max.in Binary files differdeleted file mode 100644 index 5dfddf07..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.in +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.sync.expect b/internal/compress/flate/testdata/huffman-null-max.sync.expect Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.wb.expect b/internal/compress/flate/testdata/huffman-null-max.wb.expect Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput Binary files differdeleted file mode 100644 index c0816514..00000000 --- a/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.dyn.expect b/internal/compress/flate/testdata/huffman-pi.dyn.expect Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.golden b/internal/compress/flate/testdata/huffman-pi.golden Binary files differdeleted file mode 100644 index 23d8f7f9..00000000 --- a/internal/compress/flate/testdata/huffman-pi.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.in b/internal/compress/flate/testdata/huffman-pi.in deleted file mode 100644 index efaed434..00000000 --- a/internal/compress/flate/testdata/huffman-pi.in +++ /dev/null @@ -1 +0,0 @@ -3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-pi.sync.expect b/internal/compress/flate/testdata/huffman-pi.sync.expect Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput b/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.wb.expect b/internal/compress/flate/testdata/huffman-pi.wb.expect Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput b/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput Binary files differdeleted file mode 100644 index e4396ac6..00000000 --- a/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect Binary files differdeleted file mode 100644 index 09dc798e..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput Binary files differdeleted file mode 100644 index 0c24742f..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.golden b/internal/compress/flate/testdata/huffman-rand-1k.golden Binary files differdeleted file mode 100644 index 09dc798e..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.in b/internal/compress/flate/testdata/huffman-rand-1k.in Binary files differdeleted file mode 100644 index ce038ebb..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.in +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect Binary files differdeleted file mode 100644 index 09dc798e..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput Binary files differdeleted file mode 100644 index 0c24742f..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect Binary files differdeleted file mode 100644 index 09dc798e..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput Binary files differdeleted file mode 100644 index 0c24742f..00000000 --- a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.golden b/internal/compress/flate/testdata/huffman-rand-limit.golden Binary files differdeleted file mode 100644 index 9ca0eb1c..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.in b/internal/compress/flate/testdata/huffman-rand-limit.in deleted file mode 100644 index fb5b1be6..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.in +++ /dev/null @@ -1,4 +0,0 @@ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -vH -% ɷ}>lsmIGH1Y4[ 0[|]o# --#ulpfٱnYԀYwC8ɯ02 F=gnrN!O{k*w(b kQC9/lu>5C.u diff --git a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput Binary files differdeleted file mode 100644 index 881e59c9..00000000 --- a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-max.golden b/internal/compress/flate/testdata/huffman-rand-max.golden Binary files differdeleted file mode 100644 index 47d53c89..00000000 --- a/internal/compress/flate/testdata/huffman-rand-max.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-rand-max.in b/internal/compress/flate/testdata/huffman-rand-max.in Binary files differdeleted file mode 100644 index 8418633d..00000000 --- a/internal/compress/flate/testdata/huffman-rand-max.in +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.dyn.expect b/internal/compress/flate/testdata/huffman-shifts.dyn.expect Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.golden b/internal/compress/flate/testdata/huffman-shifts.golden Binary files differdeleted file mode 100644 index f5133778..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.in b/internal/compress/flate/testdata/huffman-shifts.in deleted file mode 100644 index 7c7a50d1..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.in +++ /dev/null @@ -1,2 +0,0 @@ -101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
-232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-shifts.sync.expect b/internal/compress/flate/testdata/huffman-shifts.sync.expect Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.wb.expect b/internal/compress/flate/testdata/huffman-shifts.wb.expect Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput Binary files differdeleted file mode 100644 index 7812c1c6..00000000 --- a/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.golden b/internal/compress/flate/testdata/huffman-text-shift.golden Binary files differdeleted file mode 100644 index ff023114..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.in b/internal/compress/flate/testdata/huffman-text-shift.in deleted file mode 100644 index cc5c3ad6..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.in +++ /dev/null @@ -1,14 +0,0 @@ -//Copyright2009ThGoAuthor.Allrightrrvd.
-//UofthiourccodigovrndbyBSD-tyl
-//licnthtcnbfoundinthLICENSEfil.
-
-pckgmin
-
-import"o"
-
-funcmin(){
- vrb=mk([]byt,65535)
- f,_:=o.Crt("huffmn-null-mx.in")
- f.Writ(b)
-}
-ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text-shift.sync.expect b/internal/compress/flate/testdata/huffman-text-shift.sync.expect Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.wb.expect b/internal/compress/flate/testdata/huffman-text-shift.wb.expect Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput Binary files differdeleted file mode 100644 index 71ce3aeb..00000000 --- a/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-text.dyn.expect b/internal/compress/flate/testdata/huffman-text.dyn.expect deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.dyn.expect +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.golden b/internal/compress/flate/testdata/huffman-text.golden deleted file mode 100644 index 6d34c61f..00000000 --- a/internal/compress/flate/testdata/huffman-text.golden +++ /dev/null @@ -1,3 +0,0 @@ -AK0xßZLPa!xADI&#IEp]LƿFp 188h$5S- F66!)v.0Y& SN|d2: -t|둍xz9骺Ɏ3 -&&=ôUD=Fu]qUL+>FQYLZofTߵEŴ{Yʶbe
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.in b/internal/compress/flate/testdata/huffman-text.in deleted file mode 100644 index 73398b98..00000000 --- a/internal/compress/flate/testdata/huffman-text.in +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import "os"
-
-func main() {
- var b = make([]byte, 65535)
- f, _ := os.Create("huffman-null-max.in")
- f.Write(b)
-}
diff --git a/internal/compress/flate/testdata/huffman-text.sync.expect b/internal/compress/flate/testdata/huffman-text.sync.expect deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.sync.expect +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.sync.expect-noinput b/internal/compress/flate/testdata/huffman-text.sync.expect-noinput deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.sync.expect-noinput +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.wb.expect b/internal/compress/flate/testdata/huffman-text.wb.expect deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.wb.expect +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.wb.expect-noinput b/internal/compress/flate/testdata/huffman-text.wb.expect-noinput deleted file mode 100644 index d448727c..00000000 --- a/internal/compress/flate/testdata/huffman-text.wb.expect-noinput +++ /dev/null @@ -1 +0,0 @@ -_K0`K0Aasě)^HIɟb_>4
a=-^
1`_ 1 ő:Y-F66!A`aC;ANyr4ߜU!GKС#r:B[G3.LbFRuM]^⇳(#ZivBBH2S]u/ֽWTGnr
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-zero.dyn.expect b/internal/compress/flate/testdata/huffman-zero.dyn.expect Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.dyn.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.golden b/internal/compress/flate/testdata/huffman-zero.golden Binary files differdeleted file mode 100644 index 5abdbaff..00000000 --- a/internal/compress/flate/testdata/huffman-zero.golden +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.in b/internal/compress/flate/testdata/huffman-zero.in deleted file mode 100644 index 349be0e6..00000000 --- a/internal/compress/flate/testdata/huffman-zero.in +++ /dev/null @@ -1 +0,0 @@ -00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-zero.sync.expect b/internal/compress/flate/testdata/huffman-zero.sync.expect Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.sync.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput b/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.wb.expect b/internal/compress/flate/testdata/huffman-zero.wb.expect Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.wb.expect +++ /dev/null diff --git a/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput b/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput Binary files differdeleted file mode 100644 index dbe401c5..00000000 --- a/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput b/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput Binary files differdeleted file mode 100644 index 8b92d9fc..00000000 --- a/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/null-long-match.sync.expect-noinput b/internal/compress/flate/testdata/null-long-match.sync.expect-noinput Binary files differdeleted file mode 100644 index 8b92d9fc..00000000 --- a/internal/compress/flate/testdata/null-long-match.sync.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/null-long-match.wb.expect-noinput b/internal/compress/flate/testdata/null-long-match.wb.expect-noinput Binary files differdeleted file mode 100644 index 8b92d9fc..00000000 --- a/internal/compress/flate/testdata/null-long-match.wb.expect-noinput +++ /dev/null diff --git a/internal/compress/flate/testdata/partial-block b/internal/compress/flate/testdata/partial-block deleted file mode 100644 index b14e816a..00000000 --- a/internal/compress/flate/testdata/partial-block +++ /dev/null @@ -1 +0,0 @@ -HQ(/I
\ No newline at end of file diff --git a/internal/compress/flate/testdata/regression.zip b/internal/compress/flate/testdata/regression.zip Binary files differdeleted file mode 100644 index 73cf8403..00000000 --- a/internal/compress/flate/testdata/regression.zip +++ /dev/null diff --git a/internal/compress/flate/testdata/tokens.bin b/internal/compress/flate/testdata/tokens.bin deleted file mode 100644 index b93c6968..00000000 --- a/internal/compress/flate/testdata/tokens.bin +++ /dev/null @@ -1,63 +0,0 @@ -<mediawiki xmlns="http://www..org/xml/export-0.3/":xsiw32001/XMLSchema-instance" xsi:sLocation .xsd" version="0.3:lang="en"> - <siteinfo> - name>Wikip</base>끀en./Main_Page</generator>MediaWiki 1.6alpha</cfirst-letter</ <spaces key="-2"退</1">Special0" /ɀ1">Talkŀ2">User3 t4">܂݀5 6">Image7ڀ89 10">Template 1Ӄ 2">Helpހ3ڀ4">Category5 00">Port101Às҈<pag<title>AaA</id>1</idreviԉ32899315mestamp>2005-12-27T18:46:47Z <contribu<userJsmethers</id>614213쁀Ӏ<text xml:É="preserve">#REDIRECT [[AAA]]</怀</</ÂlgeriA҂ǂ18063769ǂ07-03T11:13:13Z ǂDocu802minorcomment>adding cur_id=5: {{R from CamelCase}}ҀԂa]]ԀmericanSamoaɂ69ԃˁ4:1to6 ۂ݂ ނppliedEthics858989432-02-25T15:43:11ip>Con script</i䉀ІAutomated cԀ ethics]] -ccessibleComputing103-0422:18:38Z䀀艀Ams8075Fixing redirect̂_c͂dꉀ9-22T16:02:5ndre Engels300ךɏda programming uage邀narchism2Á42136836-T01:41:25CJames7߆삀83238Α菀/* t Communism */ too many bracketsـ{{րm}} -'''倀m''' originas a term of abuse usgainst early [[workclass]] [[radical]]s includthe [[DiggerofEnglish Revolution]] andsans-culotte|''s''ȀFrencǀ.[Οuk.encarta.msn.com/encyclop_761568770/.html] Whilstis stillin a pejorative waydescribe ''"any act thaƂviolent meanstroy退organizsocietyˀ''<ref>www.cas.sc.edu/socy/faculty/deflem/zhistorintpol၀ History of Intal Police Coopeށon],final protocolsȀConferenceRome forSocial Defense Aۄts, 1898</, it hlso been taken up posilabel by self-definŅ惀ts. - -The word '''is [[etymology|derived]][[Greek|Greek]] ''[[Wikary:&#945;57;616345;|ƀ]]'' (without [[s (ruler, chief, king)).m[[polit philosophy]], isbelief''ـs'' are unne𑀂aryshould be aboed, althoughބredifferՈiprets߄whaisچreferrel[[smove]]s)advƩe elimi耀authoritarian institus, particul[[state]].Ņ쨀e쨀DiЀ_of_m] on WȀ, aed 2006 Ճ[[Àۃmost܀ts it, does not imply [[chaos]], [[nihilism]], orԄnomie]], but raă a harmoniouanti-͂]]. In plaۃregardLJsstructures܄coercive ecocƂtsal d upon [[ntsautonom߁individuals, [[mutual aid[[govnce]]. љ -eeasily ̉byit is܀ԃсoffertheyve to truly freHowever, idebhow antety might턀k v臀considerably, esly rts; eǁdisagrebe brou. - -== O푀predecessors == - -[[Peter Kropotkin|os, argua退focorded [[oryhuman was⏀ed oprinciples.叀.쐀[[M䄀Aid: A FactorE'', 1902.Mthropologfollow拀vhunter-gaer bands wƃegallʕd dilrumud weֈdecreew兀d had eq׆resourcȂFriedrich|Freiʂarxنve//܅s/1884/o-family/index撀FamiPrivތProperty,S884Ɂċ -[[:WilliamGodwin.jpg|thumb|right|150px| ]] - -Əty Osډބrray Rothbard|]] findattitude[[TaoisƔ[[HChina|AnciՇ(Toronto)က.''𬀂pril 14]] [[2002܃toxicpop.co.uk/library/tT mirrorgeocitiesژSoHo/5705anɘ Vanity site, 끀lewrockwellr/aׂ-chineseтese LiberґTț退an extr˙Ãmisejournals/jls/9_2_3.pdfceptRol⏀llecChange Toward Laissez FaireҁJ̀́Studies, 9 (2) Fall 1990Ԇ 銀 found similarstoicism|ۄ[Zen CitiumӏAccordoNJ, Zenorepudiomnipotstٲnregi܍oclaimЀs䐀eigntymoral law. bcrayon.jspbritt1910l, wen by 㜀Ea Britannica, 1910][[Anabaptistęof 16th century Eu㊀some𩀄religi͏runnof modernډm. [[Bertr傀Russellin ''ۉWestPʆwritesssincey helՃgood will be guiat y mo[[the Holy Spirit]]...[f]preŇy arrive a.|Ҁ'' in ''Aǂ䅀connecȔal circumŀsʥiest nt da1945Υ (True Levrs)|rЃ遀tic durimeCivil War門ׅ̃쇀zpub净notes/aan-ߕه܇t Timeline],ႀɇ94ӑ -Inفeraҁtoto矀ށthingn [[Louis-Armand de Lom d'Arce de Lahontan, Baron|]]Nouveaux voyages dݧl'Amrique septentr㦀e(1703), w he܇䃀NۨUnis|indigenƞ݀had noČlawrisrs▀ppras beye.lib.virginiacgi-local/DHI/dhi.cgi?id=dv1-12 DicۅI玀- ANARCHISM]∀ MeanslʑѬleader IndãM,ۨrepelyat hso[hiscestors䅀1793꺀kȭۢ× pub ''An Enquiryْrning«Justiceweb.bilkent.edu.tr/Onˬe.upennjlynch/Frank/ǘ/pjtpۏ]. A did no䁀˱ޛr҄havᣀbookmajo e錀m Bupoint notyet existt쟀known mainlyult hurl[[bourgeoi[[GirondiӅmࠀݲ elrȲ. - -==TՁ==̜Pierre_Joseph_Proudho՜110px|thumb|left| ]] -{{marticles|[[ր-(ory)]]}} - -Itoit wasn't untilꀀ[[Wɇis?in 1840уÄadopӃ݂p. I 챀iψasoame ŀorÁ̀nswրaӢϟׁ[[thef䁀Iis ⡀oppoЀ (proprit덀ownlete right߆iꇀsh, such as exploik退profit摀 ̀=րp-ⶀ҄|, ޕ슀ڀכ/subject/s//ch03.htmp3. L effi⟀ cadoˀ supporѬhe caꈀ'possesߧ' -✀s canlimiȁƧ, capit㧀nd홀aԞ㎀酀j's 冀] (¯ellisme), involxc̃ӓgroups ctradproducirr usÅ''''Гree amount ofin怀Tۇensuren of薀s. W݆ρly join togeco-ͅ˕shopŷest- bankbt up旀provide͝𫀎䁀샀influentialɀin iƈllowЈɀac̓[[R1848ce.x:⍀ deveʀa numbБs overlifܡofFordetailscuŇ seꁀ쎀|]].'' - -==Max Stirner's EgѪ==嚀ݞEgoIts Own'' stߘno̠ofa, natur〆l ̀-milluՂ ''ghostsՅisay爀tϏHeݹd e܂Ƃam܀amorꉀ刀u'Ԁts' wheirڇdo so. For him낀s伀come鸀ظ:Who奀門stoĀ,defe釀ghimong𥀂܃AقWȄIin my powᄀyɑ. So longIert my쁀alder, I aNJrie - - nȎhimЀ-܅藀 ''. Ntheless,ꇀony-ꃀÀÀ瀀Àdivers==ـ退ڀBenjaminTucker浀ീ ]]қȁۏ1825 [[Josiah Warre렀墀Āip膀π]] experiś hea은RobOweμNew HarmonÀffew years amidst mucˀconflict. blam֫ty'sҀo挀⯀[[Ű. eedoseؓèies聀ڔāܱŞ[[Utopia (y)|ׁ[[MTij33ԁwrotޜPeaceful ist̖Ёѓbperiod退. 톀atـmanεfor羀doctrinwъϺ(''y'' XIV (Dece00):1) becamዀeet߆B. nݍediπԁAugustԾ1908;ideŀnۄist-issئЀ. '߀incorpora灀Ճaƀܐorists:'ړ؇љ|Ιalހ;cɒ퀀price|heterodoxics|]][[l㏀桀倀value]]);픀]]'s marke'mԁaя[[Her݊Spencřdom̀ong靀'sՄ́㑀eۛ䀀[[ʔƀȄatƄ-pa|섀ꗀܻ37Pay ⮀y: Sels FWrυ R.Џ,guard Press, York26, Kraus Reހ., Millwood, NY73.ˡ[[ʄҍ∀ʀsysteme abunof 덀petܖԀfŗ҂ݰreceivfullۀ鄀rO 19ɓۈludLysaSpoon[[StepҗPrewʀ[[VìYarrosFÀrna遀alۓakunin鬀㬀ؓ͆ٓ|Mikhail 1814-1876듀ingm݉AӀ܁ፀ́M쓀rsh reon. Twenty64ܟ'݂',띀mÁancurren. Dрo͜genuin鋀nksဂׁsignantstarKarl]] afigȎ܀:шo esucGeneCounciȀ. Ȁo׀to |±twhoʕԄ ShortfЫ[[ᄀrsԦ1868̈ꂀpolariinto two camps,䂀酀ˆƏir얀كsearФí betw〄ကȀyՄڀrfavoured (inʀ'rds)ۀggle逌恀蔀ĀparҀeۀgiʀ A뉀foc瀄onłity. - -遀chaeЀ钀aـiمi쓀aoÂȫendʪȿa[[rul뀈Ȁaڀst쿀|ہՐlcyc̀hp/adpage.php1969 S̀1872ܝ climax逈spliۀtwoۃHague Congې (1872)|iዀ̀c׀ڈҌt_͈_to_m|nˀ˹ꀈdemocracy|tр怂n[[Ӏ''𨀈dnЕğǕlƀof -wĀ鍀s|''.]]|ي}}Ѐ͐䈀ڀژͅ bothNj慀Ձt턀܋숀1870ہd awayҌ'si (ϖcoրހm)embracӜs.˂tǀ〄뀐π׀ive݀Ȅbߐs݈̀need,耄٘nefac.net/node/157݀̀ڃDjacqueperӾ䫀beښ܅ǂ。onbooksbleed/̀Հ/Deۀƀjoseph.d.鵀.fr/ecrits/lettreapjp De l'tre-䀂le et femelle - L P.J嵀] (ـᵀ|]])؊Unlike,ܒ耆䃀䝀͟⊀ʀ to쀆he ̈sfa퉀Ԅhat܆ ma适ire局nnounceπUS pubed ݀ Leaire58-1861). - -P,see뀆impn, outlׇConqueBrend Fiel瀆He fel-Ȁbene喀l䜀,õזĕ耰97). SubsequꂀȜ Emma GoldAlexלBerkman. M̀o-syndߓԘ(w) sawve. Isaac Puent1932ٌorioǀbŲSpanCNT샀manifestot-њ - -Ssliked mergڋm. S煀ꁀaintaބㆀ܁ԩsՀy. exaǀ,wess僀߆ƀzeteticӊmacdebates/apx1pubsl],ր타pseudo./̀agandadeedJohann֑ؐ[[nspok߀ّ̀ʀƀrayȐgerꆀကuihighɀfilts[[riot]]sassaрrre[[teꀂ¢̆[[]]a艀encouragedǛce, ̀Ҁmb]]ШԃÁǀad앀|o furtā退恀en'ޫބڈބ]]'OnegЇo saidрՀɃ䀆quickeφdԀlyw瀄ŀanônere׀ssacҁenempeople musǀ⑀{{fact}} 톀'߀o߄, dynamite, earۿkerD - -ۀno뗀nsensuͣޔݱegitimacyuti၀āÀ.У[[ErricϤ߅stanjӸ逌desirableŀݧsets.ـ蒀me y dن邀. (ǂOn VcӀЄΙĀfNechaevؒΆցidentifiىo-pacif|˂䊀Ԁ[[nonvʞ[[Leo Tolsto;oseˀi哀vieݫ[[Ch۶ʀnotրti逆Ǯʀsee|ǒɍFlaȀ m.sv75px逄red-and-bflag, comӈфɿـҞբy 20局㥀ŅفـӀǀצ鮀ԗ pursuѲindـ[[gkprim؟훀ghǴ - -A[[PԬΠe|1871Հ̀]] ԜrgedՐҀc̀''Bourses뀂TravailՀÆ낀s րunio㋁v ݀[[ConfdrGnle dunfedΚof, CGT)1895Ɓ倒.m쒀Pataud寀ougetကCGT saw֛ř]]ԀԲ҄. Ƀ191׀mÔappe[[Bolshevل. -styleaificڶ酀o 1921ŔremߙSpain〈mid0ٓ߶̓Ʉhe Worl(IWW), 1905USo⯀LJЃsᆀt߁ushȿۈπȀ923 100,000 mƀ쒀dֱ́ڀ3匀lici봀ъ毀by rank̬,ըodyׄa shnspiҟAnglophۀCNT_tu_votar_y_ellos_dec⚀Öㅀǀ2004. Reads: Do耂leti܀֕ lives/ You vo͋Јcide/Àaۀ it/y, A藀, Self-manag
む ӈҀLj's,0ŃȌ̎Źssfulۉրcin Nacؾ delʊbajoىډÀur:ڡPцsЀaхhipɄ1.58 millξ934ꀀplay뀀rol薀[[See:ֿ΄ђSހkπRicardo Flores MagnڐkeyߥMex屁ߔatin|Ȁ净exteZapta Arm܃|]] reb倂رŀުory occupin ArgքnaBerlin192늀᳀ـ㌀兀[[ĀContempor̂ˀminu܀즁Ԁ;րsmalle펀灀s, 20s30s.largÊʻȁۍtoda˘ـʇɇNT恀paid-up6njЎȀՆv懲[[sـꒀ ؋ƀӀĀSolidarity AKӇFމׇڝҖĀ쀂熀2Â. 鄀c̀c타Υm愀߆酀ǀۀd뒀瀂ꀂ. PsׅBob Black܀sŀĀΞManԀۛ|ԁ휀 up ӀsĀ ԫۀmain1917ހ߈seiseƆԀ타a倄 كkFebrud OctobersѤԔက߀p۽urnĀǀ ݾiăည밀ch cul𦁄1918 [[KroЁdߍinрim텁r dderĀorԍvрs[[Ukra扁좀ƀ䋀|civil wa퀄Whi뀀̀Makhnovshŗasa䀄N넁]]). - -Expn㙀ïᡁleavׂamongƦʀsponseˉׁc炀ダuing. BothڔicuћiǪ , ㋀Ѐxpoץңthemͩ܀ˀuɀofǀʀloꀂ∀愀ɀresul삀遀넀s䀂u瀀lyꊀvㅀ㌀謀ޓ̭;맀eģew ώUS퀀[[CGTIWWㅀg怆gmselvesǀ瀂ʓm̂|In Ʀɔelo Truda]] p҃͟iles璀ąa㌀n뀌 newϕҀ֮T߀ߖ,耎[[PlatÒ׀ҙӅƄ⯀〆ܕȩ߂茀ɀs. ''̀ـҡ뀂ʀ䙀ނƶဂ'Ā', 'tacʏɀv䊀ib'҅'π'.܂ՁIUK'赀 [[North Eas郀ne遀bCanadaf瀄厀fasAЀCNT-ǵـ-car-؞ဆ270px|Ό1936M聀ڛ脀熀 carءׁр̀vև| ]] -I1920ނ193֭i،Ⱥ퀄ߩs䀀ڀ璀⁂s,׆cffi choiceͼקғp軁Soviet-ݒً쮀Ԅ݄t뽀y? Luigi Fabbri휀Itamωrguӏڃƀހt: - -:Fᅀݳ anꉀꂀ縁,s, u݀Ȁ뀀imaginЀut橀glorےـpހŁڅyݿ펀ʧ׀Ղt怂ڋlŀΚ rio،ҎˀӘрa 'eont'땀րor.univ-montp3.fr/ra_forum/en//berry_david/m_or_᠀ʀ֩Èћtòݣa݁̀byrs Ȁ鉀 ۀЕ36,ʂŀƄЂhelbًbည. Monthꀈဒponˢtˏcoup倀Ʈ-39)w܄repȌ|ތt-䐀fᛀwa͊militia푀rol[[ciΙrcelon߀Щf ruׄÀeyƀ͌zꀂ9los˃Åb耄耆׃ኀ ⚀怈Àɹ͇綀tro܁ Ԁي.倂troops址Ȃۍ݀ecu[[POUM|disnༀr飀Ԁ197ꀁ̀ɐقneo|I̪Kingdoma䑀|߅]]٤y̩Ѓreccombgphys햀ɁrelyݥʀЀ킀9tend适皀USmti-Raː︀US)叀Kfa]]Rʵ뀀ՀLeoɀʀ|ʀ8-1910ՀŖʀ綁Ж -πt cul݀Ăb䃀lifޑout˄ athie䀀u瀎ǃڨۍ쉀臀soroՇlas퀀Ԉ߀di⤀؉Ãф烀⾀̀̈픀ҀerڒЍGodǙ߁؞ʀthly팀рchurcheԀ。Jesus' teagфtic䀆orru֏рideclo鱀ف. π舀ހׂ cheeḱric
рmрԉĄԁҀof Godin You܁ـۦaۅbas镀䀆܀рȆȶĂ⇀ [[yocca[[taxŀ| tax߇Ω[[vege꤀vۘ. - -Հۀ瘁rooȓs ol脀's biŕ[[ᅀxhibڀal. Byobey utte뀀릀Bibl怂Ѐ犀ڇԄsixteenӀӀ'-ЁiǀӀtobeɀoƇቀreje)hier䍀(indсnon倂Ğޭ瘀god߂܀Սitetyp͑beginnڊiŀabal,ဆԁ߂modelsŁң늀ʼnŀ鸀礀ђ ̭Ёi-Xu[[Budd퀀by [[well-fiƁ ̮〆πenvҀ̀똀̈ɞa瀒n䀔뀆min⃀偄StarhawkhoԀeЀtenɿܘi[[aŪfem՚ǔ-4˔|ـa-FӀE뉀[[Jenny d'H ur吀[[Juliette Adam֪À̅[[mysogyn亀րu雀185Ԁ a-fāa恂ўؑ妀Ȅʁpatri]f愀恂roblemډ lԀᎀ֙aہdЁԀáۀ灀'' dǁ70sĀځsallydaoTwo݇ - Two䞀 - W:ӰބtĄ,փ㘀ond-wavł|ҳڀ,䀄̓ފ܀傀πuܣπꀄȅd؍с݀femaؓ爁˲ׁϚy뀆ʀ䁂Ԭ၀Ԋ쀄匀ȁŞȀ ܁cregԀՁׯȁÀˁ쀂偂́Ջ schooaddӜsրǁ[[Eco-Ԉ逌߸➁ހof 恂Āрυ߀ʍ瀚20th-cܧՂ[[Voltairde Cleyƴώ퀀ĆĚMWollstonecraȬotoewsʁ쁀灀̂precursܚItﴀbǃ怀Ӂ삀yŞ脀Ā֕藀̀Miss쁂ist;ist. Sᢀ桀rǁ. I make arileԬ̰߀ωaЁ쀄SÀĀtirlрł亀މ涀alw℀ӢiܪځπҿFre倂men頀。ރf̅.۱ޗrn dayЀ 쌀ƀ뀀䚀׀сgroݴڀof Quiet RumođІ門Ѵto sprʀki♀肀ꀈbro倀. Wendy McElm take☀耀ҁlퟀwebsiteiۀts.net I-Ȭo-펀Ǭ遘 Smile.JPGҬحȀ-1995)}} -ꑀᯀs-ƀ֪ݏ뀖[[fㆀ豈branᩀȄݾt狀݆あҀЀtʀӈogဂynthӦ[[ҭԠ[[Angerʂٗ׀ʀꍁߋց-agś핀epށLawÃC싀́(Ÿ׀David m)ʂøacJan Narvesڔ)ǂm|[[Ayn RanƼNozickݼAՂinleɀ悀⸀ѷ݀ڱۀoπHarЅ̄Ralph Raico]]۶㎀p퀊RـGustde MolႀiAuberon ]] ᝀ|À, ڀ뷀praxeology.net/MR-GM-PSPreɀPρSecu͒sJ. Huston McCulloch, O Pa Seˀ #2 (Rid M. EƁEditor)粁: CeӀMay7란ŀ-harځ|ɂѐ큂ᄀy/1787⁂uԝꁂĂρury''] EcPolytechn灂,rRecherc Epmologie Apple,e au CNRS () OpÊȀspuhת瀄s㜁McKaain; Elkin, Gary; Neal, Dׅ''einfoΘfaq/nd11Ȁ RepEЏd Dɞ檀 Bry׀aѺȋTy FAQ烀5.2]FAQ V11.2''dˀ20,6ᄀ׀耄̂▁́wheۗڀl蘀Ɨ͋٢Ā᐀i⸀b劀ffՀꇁm|G埀m|Eco-ؒĀ̳untĀtakƕhe£Ł.ǡ܍[[́Áڀ[[deepۊȏldviewbƁǀʀ偀[[sрʀˀ퀜ʏearth-䞀ֈ. Of 适Ӡ؏退E!]]ꀀakeあtsitAno셀退[[eōseeި oҋÀaphwǁămɀqƽإelf.P퇀Ș쐀voca reݿpre-usu孀agrЃIہπǁ.is隁chnր門[[alie|πԀᄀ̓ށi݁ڀ癁̀ӖӀဆˀLudˁꀀJean-Js Roau]]. ŀÀext߁S, ͆䀄̌׆sJohn Zerzʗ퀈 &mdash;ۀn߀мvedơԃ'e'ઁuГ-gaҀÀ؋ugh퀊'a薁ă̋큂Ġoffshoots== -ec鱁csyncɟ̀πŇme196Ҁဂ7ꀂʐ荀Ȁ. ̀sダĀlӫnうق쀂𫀂怀ңƯbovᵁHakim Bey.jpeπ|麀*'''y''끂 ()Á곁ъڢal -ŀـǁ聁etc. -Ӄscap醀懀ҁ삀[[id.ׁ耀 weakenÝattachڈrӰƁЁlsues (ƀti-nu쁀)ʀÈȃpec˒⌀׀ҀĂԂс܀eu.삀㊀言րspe퓀҆рoadևhungan렁ɀԀā̀absÀāݸ. Iӑƀˀ߆ҀĀ׀C̀thInc]]㒀gazЗy: A Jʴof Dܯe A߀ᄀفJMcQuinɇ˂srm,͛ħ''AyLefam߀ - ۀ se퀀Յm.ws/post耀倀ƀ.Ȝʀy鑀碀Dž po剀˩ÁާSaulԁۧ,eivӗψǯ눀Lacې''fⵀaƇ퀂stŇӈ̀зtoׁ'܍,ꁂĀɊ혀ρށ rd䀂܆Ɂޮ嘀ȉƁ[[sɀǂ囀nŁmoрЁǁށғ co⁌ֈfԩ巀,݀蚀ssib𰀂 degofҀռorۙb홀ouperubric. None끈耂쎀ƈم[[Toddҧ煀[[Gilles Deleuzƀ烀[[Flix Guat✀''Exυ:߇m Cinghouseˁƀ酀܀фȀ̀IÀڛofr텀ߑl̎딁ǀܕ affԀyrryޗڀ㓀nҁ㏀ಀۊڀosoWolfi LԀtreichefȭBonā适ˀϓ JoҮˀ썀鄀TҀon╀ꗁށUSുĀfulΪـ҆Kng߀ Abacuހ΅ߝڅS 'aҍԅеǁw unconnڀexts.刀胀π܀iޅbig 'A'ႀlspunk.org/introͲe/sp001689ːm: IʊMethod?].ҀÇՀ؆ۂaŁÀerparts;ɀ́a wa耄遀逈ƀĀA܁ځsha〄ȍ巀GraebՀj Grubacic]]耂n䁆veۂ́o賁rԤn帀ȁˀۀ, volu܁ߎaidnetƀlcruϿ戀胀endϤifeanўlɀ흀busine܁ʬozovis㬀ǀ𤀄܀gunՄzmagnt/showԇ.cfm?S٣onID=41ItemID=4796]Ҟs==CҀpӉᡀ瀄ˀсĭg《悀ǯuȁ׀ƀmǀݟ⁂eȡϸe ĀځρΡ⁀;ڦԀlgoɛ''cause''chaos,warāˀրغnopoŁʼn|m၂šadvۑe၂e琀 Much effd⁂Ҁhowږ쩀ӫhandleÀ .ރ߁ety֧vereign腀[[ᎀрאˀsubjug郀olor适܍ңƀ[[Ashanti A׀[[Lorenzo Komboa Ervim Mbahیǂt PCaeׇↁcaucasɀɃexpȀiݚ뛁适觀݀, p뀂̀ȯŀƃÉ͖ꈁor ethnߊrvуــՁܝƳòprϥՀ뎀(ťrac̳)l倂epa¯۟Ҁ鳀ǝĝ蘀ꉀŃiӉȀˁnvolܶũہconf᳀˃Ɯ끄oߦ뗀貁ׄhiap熁oaـcԍNeo׆ȡ兀Glob臀Nހ̀Ā䒀mptрcoercg scaƁˀrld Ban퀄TÁOLJ[[G8|GՂEއ̀EЀ Forum偀mbiguermڀǙȂՁɌ̇sـoɀ킀/impeǁɇ(၂ed)ǁreݗ. Os舀̀Փ蔀ׁ̀㷀expans䃀蟀߁ၐsoಀsƜven鵀Par͠lȠ֓߁try胀ӑ-߀outᤀs,ԟ م[[Food Not Bombā简ˁeduhome-ݷneighborhԀm聂/arbit㢀so Ѝ炀ӍasheloldՀTRecmadeasierʀ׀ivaۓяԦÁon-l鸀ߊtcper璀̂ˀԶgift-ϋƁing|퀂ic [[opeurȣprogramމ softwկTƼcyber-ȁ[[GNU䊀LinuxIndyӄݤkժ!-- ***NEEDS SOURCE THAT E-GOLD IS USED BY ANARCHISTS*** [[Pͬcryptograp⊀냀anony菀dig ͆e-goʁLocal Exch၂SƆ҇郀чa钀n콀. --Ԁрbȿwea适efeaـ㴀݀ծۀageꀄsevi〬modulaware.com/a/?m=sњid=0684832720 S Indual -sѣځyptπאۀCypherہܨԀ㢀ԁ]] ([[w)biۀО܁ǀぎˁ̀܁|㙀Ŝ퀂㮀Ԁǂؽ,dome؍ݿዀۚub߀olph BeЁ덀njhڏن߁σ_髀/warŀWрH倀. A loޖłēÀ㰀dParliaIĀ sin , beÀam긁ĀdonȂـ.aolԇvlntryst/hitlerޏVƨۉy Iʠǁ֫HÂ؆ԀȀ傀ϝemphasizeƬyҁǻgardneiΆbҁtr bulletsفūā̀Ӣ''The EthܬV/cs_of_.php悀ׂGeorge H.퀂th]]. (AlsҖOxymorr What?ϐJoe PeacotҀFWoodwor恂技SecꅀꃀӁ냀ao뼀ʀ҉܁''ȡ'' each̀utaoܗ,ŀ瀎y|Ƌۀ̉往Ѿ aπdɀ퀄fcoe¢Dž橀eΟngþݛ̿܀̡̀끀遀̀rtcoaրon-builat leaÀˀsҨ邀adj䮀v.C烀:''Mle:'''끊.Ҁ˄ith ώǮmeـۓbeoÆerick Engelsƴ gh:ADzÀlﮀ؆;by ȀȀƠɫŀ܄byiflbayĀ杀cn ŀ̣alliŁyw虁ʁvait߁ကaЀᒀȀWoulmunl֛Ƈ灂ׁɀۂᏀÈրغށgeois?ve//s/1872/10/ ''On 퀂y''Utopŀ쑁ɘfȀor ƀފיa 튀'nicр܀, Carl Landauꀂ̀unˁ❀唀at 䤁ᆀaʀer evilρ𩀂NjÀce.ꀀa鍀cease iƀڀ܀sabsurdʹٔ[߂|肀傀ՀH߀ꅀIde̍ndÁ(1959) (retrׯȿ܄JanՁ2ց[[2006]Ʌ♀ˍ͌Benjamin Tuckʀy-fꃀ𞀌݀².ׅs Si遄ÃproudlyꄀacterѶ떀[[SԀ|Clasᢀςs[[petitierha绀퀄lumpenprol妁e.gɁekhanovG. V؊傀튀pЀ//x슀څů]ހ a 퀈܀ڂen䂀s spoilɥmiddle-dilettante⬀אあъ變ϻ׀e瀂'''Tac˒ߌȌI݀退ŀaÖΓʁÒby '׀ts', '̮ts'ꈀeһg 'tק'Ԝreaucra瀂֨beh螁a dogmfacad̀.ingƀŀs/SI/en/display/20逄Spectacle]됀a 91쀀܂Hypocrisy솀ژiދΡP. d'HС,pinn~suĀne/whm2003/h2ㅀИc衁倀ﻀτӤɃي݄îͶstably [[Pi-Joseph耀|Mikhailހ|ހ쀘[[h䂀ȯˏצ璀πёhdismۋ䃀ʳprejud́9ʭހȀɪ܀ᅀnt-isèr߈ʀinu[[euroricрimpИρۀܿ[[Cercl]].煀steˀ鵀Ғ׆i倌g䀘ŀЀʀtÀ́Ꙁsus㿁 syɀhizbyۘƀéĀĀۀҀꀄfӃtئ߁Ӏlyˀ〄ird̋oāπ灎ր䁀s쌀܄ΖကƁ́() ReanϖStanley G. Pay倆heρ倂gimՀ盀ĀƁegotЖԁƼﴁȩ 逄쀀̀҄|ۈ鄀܅gmu.edu/dŀs/es/bc/spainϨՄo-SЍʁي︀henaߖNoam_chomskÁ̀|am CԲ1928)]]݁ޙݧգɁـrŀЀřcelebrѥɁҀ𘀄剀AӣրՀǀƀ׀ڀly liր賀-avoۀts: - -*сI䁂쇀essguœā fi܉ [[Ursula K. Le Guۊal 䁄 ZinÀ -* r׀րHՀ s֔Ԁ[[Avant-ீr΄icolssell큄Den적tƚ downځpenhageՌ瀂ωemployًĒ❀ˁӀeity)|squa軁ꄀkݎsti끂hrҁ怂܁ataˀЮMfa|Ԁto Naziˑրܰ[[Aπd݁̀so׀ۂgɀÀπ߄𓀂ҀԀ. ցƀtym r̀耀׀ˀlin둀ƀက rock,폀gip hopkǀbec啀g钀̀diumӤɏԀessageрn[[U܁瀂dom|UKis̀[[ˁ̀Хe bͪ䁀粀ȁ[[Dutch|DutŁЂEx]]ھexemplညܷրion. -''detaio-== -ȏŀ(PlҦрadךi¤ exفۜ툀ɀpրage)Āَrrele臀p㚀Ѐmى۔briemmaꡁՁs倂ʀÿ [[Àŀ邀ۇ倆٫۬nihib݀̎ΏĀ܂symbolπism/Links|L̄ڀnÃހMaj䀄tPas灀œᅀʓꏀ==== -*Ā粀딀871)Hayiot86Ӂڀ(1917 ɀ192ɀ⁌R36) (ψ틀珀) -*68,(WTO Mi׀ Conꁄ1999|eetin Seattl999)҂Books=mainǁoksý s߲Lj㨀߈݁콀ᛀb ⁂ڈꀀ톀ԁՄ뉀ǀāǚdmac.pitzerϚt_Aȩves/b죀/godate_ch]Ӏmᒁڀ Essaysg/CWǀPeter K tkMutπAidFEŀꗀ|gutenbergeʀ/4341Wꍀsᣀ?܀360́Rudolf RockŴ҃̂o-Syndւ()|AŐʀǰˀʅǀՊyrǀ.aspMax Stirnကgo As Owdž߀df.ցse/~triad/sÀ/ԀၔԀă䅀ځကdomnowinyouӊւ̂bԁgion/ޞٌ݊fωEnglishӀdiځҁ苀R撀div=font-size: 85%րs/div가ـnǀrcּTm䀂re-ծ -# {{note|bill}}ns52.super-h߀̀~vaz1net//awـ߀flag.t/_p牀etonрх쀈ꁀ_/_boڀ Agāt㭁A - BoքtsYarros-Not[Vi , '' VII1892⌀怀totse큀̃161594߁l ˁ== -ΈoverwhelmϏ捀쀄ۀd⅀m̀s|linkހbď -{{wikie|DeȇՀ}} -*oblogs.] Blogŀy Џ]瀂܀Ź㟀esij؞ioɀs. -*HuڀÁ݁rt biosᾁonbleed/g适ryޒDaily B's삀 EncycХia]З/] (。|Àѕʀwl WorkՂld] -Ë란A!̏ ɺs far beyoanageable siz͟い倀ˀʼn裁ᆀۀiրОȀknown ↀ怊ll b灂utinᨀk氁瀄lいCategory:m|*]]ForӀ뒀PՀrintseilosophy -[[ar:́ast:quismubg:shizaƁec -[[dkdeeokiismoeȀڀt쀀݂eu׀fa:fiirglhe:ʀhchizidˀis:StjrsisstefĞ -[[iǁja:ko:lzmanπnnnނpzɃp׀rruscoȂsimpleks绀r 䀀vth:zh:zh-min-nan:Hui-thng-t-ch-g]]</text> - <</page <t。>Afghaany</Ȁid>13</id׀5898948ހmmp>2002-08-27T03:07:44Z</ <ributor <username>Magnusske</4Ҁ<mi䀂/<ent>whoops׀<Þ xml:space="rve">#REDIRECT [[҂GeoȀ쁀92-25T15:43:11ip>Con꽁cript</ipAuto Ԁ -Pű5む50Ɇل1T10:42:35-ril166fix̒DemɅチ715-1730:05ZAxelBoldt2redirect</
\ No newline at end of file diff --git a/internal/compress/flate/token.go b/internal/compress/flate/token.go deleted file mode 100644 index 40fa9454..00000000 --- a/internal/compress/flate/token.go +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - "math" -) - -const ( - // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits - // bits 16-22 offsetcode - 5 bits - // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits - // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits - lengthShift = 22 - offsetMask = 1<<lengthShift - 1 - typeMask = 3 << 30 - literalType = 0 << 30 - matchType = 1 << 30 - matchOffsetOnlyMask = 0xffff -) - -// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) -// is lengthCodes[length - MIN_MATCH_LENGTH] -var lengthCodes = [256]uint8{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, - 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, - 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, - 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, - 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 28, -} - -// lengthCodes1 is length codes, but starting at 1. -var lengthCodes1 = [256]uint8{ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, - 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, - 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, - 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, - 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, - 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, - 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 29, -} - -var offsetCodes = [256]uint32{ - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, -} - -// offsetCodes14 are offsetCodes, but with 14 added. -var offsetCodes14 = [256]uint32{ - 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, -} - -type token uint32 - -type tokens struct { - extraHist [32]uint16 // codes 256->maxnumlit - offHist [32]uint16 // offset codes - litHist [256]uint16 // codes 0->255 - nFilled int - n uint16 // Must be able to contain maxStoreBlockSize - tokens [maxStoreBlockSize + 1]token -} - -func (t *tokens) Reset() { - if t.n == 0 { - return - } - t.n = 0 - t.nFilled = 0 - for i := range t.litHist[:] { - t.litHist[i] = 0 - } - for i := range t.extraHist[:] { - t.extraHist[i] = 0 - } - for i := range t.offHist[:] { - t.offHist[i] = 0 - } -} - -func (t *tokens) Fill() { - if t.n == 0 { - return - } - for i, v := range t.litHist[:] { - if v == 0 { - t.litHist[i] = 1 - t.nFilled++ - } - } - for i, v := range t.extraHist[:literalCount-256] { - if v == 0 { - t.nFilled++ - t.extraHist[i] = 1 - } - } - for i, v := range t.offHist[:offsetCodeCount] { - if v == 0 { - t.offHist[i] = 1 - } - } -} - -func indexTokens(in []token) tokens { - var t tokens - t.indexTokens(in) - return t -} - -func (t *tokens) indexTokens(in []token) { - t.Reset() - for _, tok := range in { - if tok < matchType { - t.AddLiteral(tok.literal()) - continue - } - t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) - } -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -func emitLiteral(dst *tokens, lit []byte) { - for _, v := range lit { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } -} - -func (t *tokens) AddLiteral(lit byte) { - t.tokens[t.n] = token(lit) - t.litHist[lit]++ - t.n++ -} - -// from https://stackoverflow.com/a/28730362 -func mFastLog2(val float32) float32 { - ux := int32(math.Float32bits(val)) - log2 := (float32)(((ux >> 23) & 255) - 128) - ux &= -0x7f800001 - ux += 127 << 23 - uval := math.Float32frombits(uint32(ux)) - log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 - return log2 -} - -// EstimatedBits will return an minimum size estimated by an *optimal* -// compression of the block. -// The size of the block -func (t *tokens) EstimatedBits() int { - shannon := float32(0) - bits := int(0) - nMatches := 0 - total := int(t.n) + t.nFilled - if total > 0 { - invTotal := 1.0 / float32(total) - for _, v := range t.litHist[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } - } - // Just add 15 for EOB - shannon += 15 - for i, v := range t.extraHist[1 : literalCount-256] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - bits += int(lengthExtraBits[i&31]) * int(v) - nMatches += int(v) - } - } - } - if nMatches > 0 { - invTotal := 1.0 / float32(nMatches) - for i, v := range t.offHist[:offsetCodeCount] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - bits += int(offsetExtraBits[i&31]) * int(v) - } - } - } - return int(shannon) + bits -} - -// AddMatch adds a match to the tokens. -// This function is very sensitive to inlining and right on the border. -func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { - if debugDeflate { - if xlength >= maxMatchLength+baseMatchLength { - panic(fmt.Errorf("invalid length: %v", xlength)) - } - if xoffset >= maxMatchOffset+baseMatchOffset { - panic(fmt.Errorf("invalid offset: %v", xoffset)) - } - } - oCode := offsetCode(xoffset) - xoffset |= oCode << 16 - - t.extraHist[lengthCodes1[uint8(xlength)]]++ - t.offHist[oCode&31]++ - t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset) - t.n++ -} - -// AddMatchLong adds a match to the tokens, potentially longer than max match length. -// Length should NOT have the base subtracted, only offset should. -func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { - if debugDeflate { - if xoffset >= maxMatchOffset+baseMatchOffset { - panic(fmt.Errorf("invalid offset: %v", xoffset)) - } - } - oc := offsetCode(xoffset) - xoffset |= oc << 16 - for xlength > 0 { - xl := xlength - if xl > 258 { - // We need to have at least baseMatchLength left over for next loop. - if xl > 258+baseMatchLength { - xl = 258 - } else { - xl = 258 - baseMatchLength - } - } - xlength -= xl - xl -= baseMatchLength - t.extraHist[lengthCodes1[uint8(xl)]]++ - t.offHist[oc&31]++ - t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) - t.n++ - } -} - -func (t *tokens) AddEOB() { - t.tokens[t.n] = token(endBlockMarker) - t.extraHist[0]++ - t.n++ -} - -func (t *tokens) Slice() []token { - return t.tokens[:t.n] -} - -// VarInt returns the tokens as varint encoded bytes. -func (t *tokens) VarInt() []byte { - b := make([]byte, binary.MaxVarintLen32*int(t.n)) - var off int - for _, v := range t.tokens[:t.n] { - off += binary.PutUvarint(b[off:], uint64(v)) - } - return b[:off] -} - -// FromVarInt restores t to the varint encoded tokens provided. -// Any data in t is removed. -func (t *tokens) FromVarInt(b []byte) error { - buf := bytes.NewReader(b) - var toks []token - for { - r, err := binary.ReadUvarint(buf) - if err == io.EOF { - break - } - if err != nil { - return err - } - toks = append(toks, token(r)) - } - t.indexTokens(toks) - return nil -} - -// Returns the type of a token -func (t token) typ() uint32 { return uint32(t) & typeMask } - -// Returns the literal of a literal token -func (t token) literal() uint8 { return uint8(t) } - -// Returns the extra offset of a match token -func (t token) offset() uint32 { return uint32(t) & offsetMask } - -func (t token) length() uint8 { return uint8(t >> lengthShift) } - -// Convert length to code. -func lengthCode(len uint8) uint8 { return lengthCodes[len] } - -// Returns the offset code corresponding to a specific offset -func offsetCode(off uint32) uint32 { - if false { - if off < uint32(len(offsetCodes)) { - return offsetCodes[off&255] - } else if off>>7 < uint32(len(offsetCodes)) { - return offsetCodes[(off>>7)&255] + 14 - } else { - return offsetCodes[(off>>14)&255] + 28 - } - } - if off < uint32(len(offsetCodes)) { - return offsetCodes[uint8(off)] - } - return offsetCodes14[uint8(off>>7)] -} diff --git a/internal/compress/flate/token_test.go b/internal/compress/flate/token_test.go deleted file mode 100644 index 9070c341..00000000 --- a/internal/compress/flate/token_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package flate - -import ( - "bytes" - "os" - "testing" -) - -type testFatal interface { - Fatal(args ...any) -} - -// loadTestTokens will load test tokens. -// First block from enwik9, varint encoded. -func loadTestTokens(t testFatal) *tokens { - b, err := os.ReadFile("testdata/tokens.bin") - if err != nil { - t.Fatal(err) - } - var tokens tokens - err = tokens.FromVarInt(b) - if err != nil { - t.Fatal(err) - } - return &tokens -} - -func Test_tokens_EstimatedBits(t *testing.T) { - tok := loadTestTokens(t) - // The estimated size, update if method changes. - const expect = 221057 - n := tok.EstimatedBits() - var buf bytes.Buffer - wr := newHuffmanBitWriter(&buf) - wr.writeBlockDynamic(tok, true, nil, true) - if wr.err != nil { - t.Fatal(wr.err) - } - wr.flush() - t.Log("got:", n, "actual:", buf.Len()*8, "(header not part of estimate)") - if n != expect { - t.Error("want:", expect, "bits, got:", n) - } -} - -func Benchmark_tokens_EstimatedBits(b *testing.B) { - tok := loadTestTokens(b) - b.ResetTimer() - // One "byte", one token iteration. - b.SetBytes(1) - for i := 0; i < b.N; i++ { - _ = tok.EstimatedBits() - } -} diff --git a/internal/compress/flate/writer_test.go b/internal/compress/flate/writer_test.go deleted file mode 100644 index 01893e50..00000000 --- a/internal/compress/flate/writer_test.go +++ /dev/null @@ -1,544 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "archive/zip" - "bytes" - "compress/flate" - "fmt" - "io" - "math" - "math/rand" - "os" - "runtime" - "strconv" - "strings" - "testing" -) - -func TestWriterMemUsage(t *testing.T) { - testMem := func(t *testing.T, fn func()) { - var before, after runtime.MemStats - runtime.GC() - runtime.ReadMemStats(&before) - fn() - runtime.GC() - runtime.ReadMemStats(&after) - t.Logf("%s: Memory Used: %dKB, %d allocs", t.Name(), (after.HeapInuse-before.HeapInuse)/1024, after.HeapObjects-before.HeapObjects) - } - data := make([]byte, 100000) - t.Run("stateless", func(t *testing.T) { - testMem(t, func() { - StatelessDeflate(io.Discard, data, false, nil) - }) - }) - for level := HuffmanOnly; level <= BestCompression; level++ { - t.Run(fmt.Sprint("level-", level), func(t *testing.T) { - var zr *Writer - var err error - testMem(t, func() { - zr, err = NewWriter(io.Discard, level) - if err != nil { - t.Fatal(err) - } - zr.Write(data) - }) - zr.Close() - }) - } - for level := HuffmanOnly; level <= BestCompression; level++ { - t.Run(fmt.Sprint("stdlib-", level), func(t *testing.T) { - var zr *flate.Writer - var err error - testMem(t, func() { - zr, err = flate.NewWriter(io.Discard, level) - if err != nil { - t.Fatal(err) - } - zr.Write(data) - }) - zr.Close() - }) - } -} - -func TestWriterRegression(t *testing.T) { - data, err := os.ReadFile("testdata/regression.zip") - if err != nil { - t.Fatal(err) - } - for level := HuffmanOnly; level <= BestCompression; level++ { - t.Run(fmt.Sprint("level_", level), func(t *testing.T) { - zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) - if err != nil { - t.Fatal(err) - } - - for _, tt := range zr.File { - if !strings.HasSuffix(t.Name(), "") { - continue - } - - t.Run(tt.Name, func(t *testing.T) { - if testing.Short() && tt.FileInfo().Size() > 10000 { - t.SkipNow() - } - r, err := tt.Open() - if err != nil { - t.Error(err) - return - } - in, err := io.ReadAll(r) - if err != nil { - t.Error(err) - } - msg := "level " + strconv.Itoa(level) + ":" - buf := new(bytes.Buffer) - fw, err := NewWriter(buf, level) - if err != nil { - t.Fatal(msg + err.Error()) - } - n, err := fw.Write(in) - if n != len(in) { - t.Fatal(msg + "short write") - } - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - fr1 := NewReader(buf) - data2, err := io.ReadAll(fr1) - if err != nil { - t.Fatal(msg + err.Error()) - } - if !bytes.Equal(in, data2) { - t.Fatal(msg + "not equal") - } - // Do it again... - msg = "level " + strconv.Itoa(level) + " (reset):" - buf.Reset() - fw.Reset(buf) - n, err = fw.Write(in) - if n != len(in) { - t.Fatal(msg + "short write") - } - if err != nil { - t.Fatal(msg + err.Error()) - } - err = fw.Close() - if err != nil { - t.Fatal(msg + err.Error()) - } - fr1 = NewReader(buf) - data2, err = io.ReadAll(fr1) - if err != nil { - t.Fatal(msg + err.Error()) - } - if !bytes.Equal(in, data2) { - t.Fatal(msg + "not equal") - } - }) - } - }) - } -} - -func benchmarkEncoder(b *testing.B, testfile, level, n int) { - b.SetBytes(int64(n)) - buf0, err := os.ReadFile(testfiles[testfile]) - if err != nil { - b.Fatal(err) - } - if len(buf0) == 0 { - b.Fatalf("test file %q has no data", testfiles[testfile]) - } - buf1 := make([]byte, n) - for i := 0; i < n; i += len(buf0) { - if len(buf0) > n-i { - buf0 = buf0[:n-i] - } - copy(buf1[i:], buf0) - } - buf0 = nil - runtime.GC() - w, err := NewWriter(io.Discard, level) - if err != nil { - b.Fatal(err) - } - b.ResetTimer() - b.ReportAllocs() - for i := 0; i < b.N; i++ { - w.Reset(io.Discard) - _, err = w.Write(buf1) - if err != nil { - b.Fatal(err) - } - err = w.Close() - if err != nil { - b.Fatal(err) - } - } -} - -func BenchmarkEncodeDigitsConstant1e4(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e4) } -func BenchmarkEncodeDigitsConstant1e5(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e5) } -func BenchmarkEncodeDigitsConstant1e6(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e6) } -func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) } -func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) } -func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) } -func BenchmarkEncodeDigitsDefault1e4(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e4) } -func BenchmarkEncodeDigitsDefault1e5(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e5) } -func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e6) } -func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) } -func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) } -func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) } -func BenchmarkEncodeDigitsSL1e4(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e4) } -func BenchmarkEncodeDigitsSL1e5(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e5) } -func BenchmarkEncodeDigitsSL1e6(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e6) } -func BenchmarkEncodeTwainConstant1e4(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e4) } -func BenchmarkEncodeTwainConstant1e5(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e5) } -func BenchmarkEncodeTwainConstant1e6(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e6) } -func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) } -func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) } -func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) } -func BenchmarkEncodeTwainDefault1e4(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e4) } -func BenchmarkEncodeTwainDefault1e5(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e5) } -func BenchmarkEncodeTwainDefault1e6(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e6) } -func BenchmarkEncodeTwainCompress1e4(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e4) } -func BenchmarkEncodeTwainCompress1e5(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e5) } -func BenchmarkEncodeTwainCompress1e6(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e6) } -func BenchmarkEncodeTwainSL1e4(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e4) } -func BenchmarkEncodeTwainSL1e5(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e5) } -func BenchmarkEncodeTwainSL1e6(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e6) } - -func BenchmarkEncodeTwain1024Win1e4(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e4) } -func BenchmarkEncodeTwain1024Win1e5(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e5) } -func BenchmarkEncodeTwain1024Win1e6(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e6) } - -func benchmarkStatelessEncoder(b *testing.B, testfile, n int) { - b.SetBytes(int64(n)) - buf0, err := os.ReadFile(testfiles[testfile]) - if err != nil { - b.Fatal(err) - } - if len(buf0) == 0 { - b.Fatalf("test file %q has no data", testfiles[testfile]) - } - buf1 := make([]byte, n) - for i := 0; i < n; i += len(buf0) { - if len(buf0) > n-i { - buf0 = buf0[:n-i] - } - copy(buf1[i:], buf0) - } - buf0 = nil - runtime.GC() - b.ResetTimer() - b.ReportAllocs() - for i := 0; i < b.N; i++ { - w := NewStatelessWriter(io.Discard) - _, err = w.Write(buf1) - if err != nil { - b.Fatal(err) - } - err = w.Close() - if err != nil { - b.Fatal(err) - } - } -} - -// A writer that fails after N writes. -type errorWriter struct { - N int -} - -func (e *errorWriter) Write(b []byte) (int, error) { - if e.N <= 0 { - return 0, io.ErrClosedPipe - } - e.N-- - return len(b), nil -} - -// Test if errors from the underlying writer is passed upwards. -func TestWriteError(t *testing.T) { - buf := new(bytes.Buffer) - n := 65536 - if !testing.Short() { - n *= 4 - } - for i := 0; i < n; i++ { - fmt.Fprintf(buf, "asdasfasf%d%dfghfgujyut%dyutyu\n", i, i, i) - } - in := buf.Bytes() - // We create our own buffer to control number of writes. - copyBuf := make([]byte, 128) - for l := range 10 { - for fail := 1; fail <= 256; fail *= 2 { - // Fail after 'fail' writes - ew := &errorWriter{N: fail} - w, err := NewWriter(ew, l) - if err != nil { - t.Fatalf("NewWriter: level %d: %v", l, err) - } - n, err := copyBuffer(w, bytes.NewBuffer(in), copyBuf) - if err == nil { - t.Fatalf("Level %d: Expected an error, writer was %#v", l, ew) - } - n2, err := w.Write([]byte{1, 2, 2, 3, 4, 5}) - if n2 != 0 { - t.Fatal("Level", l, "Expected 0 length write, got", n) - } - if err == nil { - t.Fatal("Level", l, "Expected an error") - } - err = w.Flush() - if err == nil { - t.Fatal("Level", l, "Expected an error on flush") - } - err = w.Close() - if err == nil { - t.Fatal("Level", l, "Expected an error on close") - } - - w.Reset(io.Discard) - n2, err = w.Write([]byte{1, 2, 3, 4, 5, 6}) - if err != nil { - t.Fatal("Level", l, "Got unexpected error after reset:", err) - } - if n2 == 0 { - t.Fatal("Level", l, "Got 0 length write, expected > 0") - } - if testing.Short() { - return - } - } - } -} - -// Test if errors from the underlying writer is passed upwards. -func TestWriter_Reset(t *testing.T) { - buf := new(bytes.Buffer) - n := 65536 - if !testing.Short() { - n *= 4 - } - for i := 0; i < n; i++ { - fmt.Fprintf(buf, "asdasfasf%d%dfghfgujyut%dyutyu\n", i, i, i) - } - in := buf.Bytes() - for l := range 10 { - if testing.Short() && l > 1 { - continue - } - t.Run(fmt.Sprintf("level-%d", l), func(t *testing.T) { - t.Parallel() - offset := 1 - if testing.Short() { - offset = 256 - } - for ; offset <= 256; offset *= 2 { - // Fail after 'fail' writes - w, err := NewWriter(io.Discard, l) - if err != nil { - t.Fatalf("NewWriter: level %d: %v", l, err) - } - if w.d.fast == nil { - t.Skip("Not Fast...") - return - } - for i := 0; i < (bufferReset-len(in)-offset-maxMatchOffset)/maxMatchOffset; i++ { - // skip ahead to where we are close to wrap around... - w.d.fast.Reset() - } - w.d.fast.Reset() - _, err = w.Write(in) - if err != nil { - t.Fatal(err) - } - for range 50 { - // skip ahead again... This should wrap around... - w.d.fast.Reset() - } - w.d.fast.Reset() - - _, err = w.Write(in) - if err != nil { - t.Fatal(err) - } - for range (math.MaxUint32 - bufferReset) / maxMatchOffset { - // skip ahead to where we are close to wrap around... - w.d.fast.Reset() - } - - _, err = w.Write(in) - if err != nil { - t.Fatal(err) - } - err = w.Close() - if err != nil { - t.Fatal(err) - } - } - }) - } -} - -func TestDeterministicL1(t *testing.T) { testDeterministic(1, t) } -func TestDeterministicL2(t *testing.T) { testDeterministic(2, t) } -func TestDeterministicL3(t *testing.T) { testDeterministic(3, t) } -func TestDeterministicL4(t *testing.T) { testDeterministic(4, t) } -func TestDeterministicL5(t *testing.T) { testDeterministic(5, t) } -func TestDeterministicL6(t *testing.T) { testDeterministic(6, t) } -func TestDeterministicL7(t *testing.T) { testDeterministic(7, t) } -func TestDeterministicL8(t *testing.T) { testDeterministic(8, t) } -func TestDeterministicL9(t *testing.T) { testDeterministic(9, t) } -func TestDeterministicL0(t *testing.T) { testDeterministic(0, t) } -func TestDeterministicLM2(t *testing.T) { testDeterministic(-2, t) } - -func testDeterministic(i int, t *testing.T) { - // Test so much we cross a good number of block boundaries. - length := maxStoreBlockSize*30 + 500 - if testing.Short() { - length /= 10 - } - - // Create a random, but compressible stream. - rng := rand.New(rand.NewSource(1)) - t1 := make([]byte, length) - for i := range t1 { - t1[i] = byte(rng.Int63() & 7) - } - - // Do our first encode. - var b1 bytes.Buffer - br := bytes.NewBuffer(t1) - w, err := NewWriter(&b1, i) - if err != nil { - t.Fatal(err) - } - // Use a very small prime sized buffer. - cbuf := make([]byte, 787) - _, err = copyBuffer(w, br, cbuf) - if err != nil { - t.Fatal(err) - } - w.Close() - - // We choose a different buffer size, - // bigger than a maximum block, and also a prime. - var b2 bytes.Buffer - cbuf = make([]byte, 81761) - br2 := bytes.NewBuffer(t1) - w2, err := NewWriter(&b2, i) - if err != nil { - t.Fatal(err) - } - _, err = copyBuffer(w2, br2, cbuf) - if err != nil { - t.Fatal(err) - } - w2.Close() - - b1b := b1.Bytes() - b2b := b2.Bytes() - - if !bytes.Equal(b1b, b2b) { - t.Errorf("level %d did not produce deterministic result, result mismatch, len(a) = %d, len(b) = %d", i, len(b1b), len(b2b)) - } - - // Test using io.WriterTo interface. - var b3 bytes.Buffer - br = bytes.NewBuffer(t1) - w, err = NewWriter(&b3, i) - if err != nil { - t.Fatal(err) - } - _, err = br.WriteTo(w) - if err != nil { - t.Fatal(err) - } - w.Close() - - b3b := b3.Bytes() - if !bytes.Equal(b1b, b3b) { - t.Errorf("level %d (io.WriterTo) did not produce deterministic result, result mismatch, len(a) = %d, len(b) = %d", i, len(b1b), len(b3b)) - } -} - -// copyBuffer is a copy of io.CopyBuffer, since we want to support older go versions. -// This is modified to never use io.WriterTo or io.ReaderFrom interfaces. -func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { - if buf == nil { - buf = make([]byte, 32*1024) - } - for { - nr, er := src.Read(buf) - if nr > 0 { - nw, ew := dst.Write(buf[0:nr]) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew - break - } - if nr != nw { - err = io.ErrShortWrite - break - } - } - if er == io.EOF { - break - } - if er != nil { - err = er - break - } - } - return written, err -} - -func BenchmarkCompressAllocations(b *testing.B) { - payload := []byte(strings.Repeat("Tiny payload", 20)) - for j := -2; j <= 9; j++ { - b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) { - b.Run("flate", func(b *testing.B) { - b.ReportAllocs() - - for i := 0; i < b.N; i++ { - w, err := NewWriter(io.Discard, j) - if err != nil { - b.Fatal(err) - } - w.Write(payload) - w.Close() - } - }) - }) - } -} - -func BenchmarkCompressAllocationsSingle(b *testing.B) { - payload := []byte(strings.Repeat("Tiny payload", 20)) - const level = 2 - b.Run("flate", func(b *testing.B) { - b.ReportAllocs() - - for i := 0; i < b.N; i++ { - w, err := NewWriter(io.Discard, level) - if err != nil { - b.Fatal(err) - } - w.Write(payload) - w.Close() - } - }) -} diff --git a/internal/compress/internal/doc.go b/internal/compress/internal/doc.go deleted file mode 100644 index b28bad09..00000000 --- a/internal/compress/internal/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package internal provides utilities internal to the compression library. -package internal diff --git a/internal/compress/internal/fuzz/helpers.go b/internal/compress/internal/fuzz/helpers.go deleted file mode 100644 index 71332ac6..00000000 --- a/internal/compress/internal/fuzz/helpers.go +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright (c) 2024+ Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package fuzz provides a way to add test cases to a testing.F instance from a zip file. -package fuzz - -import ( - "archive/zip" - "bytes" - "encoding/binary" - "fmt" - "go/ast" - "go/parser" - "go/token" - "io" - "os" - "strconv" - "testing" -) - -type InputType uint8 - -const ( - // TypeRaw indicates that files are raw bytes. - TypeRaw InputType = iota - // TypeGoFuzz indicates files are from Go Fuzzer. - TypeGoFuzz - // TypeOSSFuzz indicates that files are from OSS fuzzer with size before data. - TypeOSSFuzz -) - -// AddFromZip will read the supplied zip and add all as corpus for f. -// Byte slices only. -func AddFromZip(f *testing.F, filename string, t InputType, short bool) { - file, err := os.Open(filename) - if err != nil { - f.Fatal(err) - } - fi, err := file.Stat() - if fi == nil { - return - } - - if err != nil { - f.Fatal(err) - } - zr, err := zip.NewReader(file, fi.Size()) - if err != nil { - f.Fatal(err) - } - for i, file := range zr.File { - if short && i%10 != 0 { - continue - } - rc, err := file.Open() - if err != nil { - f.Fatal(err) - } - - b, err := io.ReadAll(rc) - if err != nil { - f.Fatal(err) - } - rc.Close() - t := t - if t == TypeOSSFuzz { - t = TypeRaw // Fallback - if len(b) >= 4 { - sz := binary.BigEndian.Uint32(b) - if sz <= uint32(len(b))-4 { - f.Add(b[4 : 4+sz]) - continue - } - } - } - - if bytes.HasPrefix(b, []byte("go test fuzz")) { - t = TypeGoFuzz - } else { - t = TypeRaw - } - - if t == TypeRaw { - f.Add(b) - continue - } - vals, err := unmarshalCorpusFile(b) - if err != nil { - f.Fatal(err) - } - for _, v := range vals { - f.Add(v) - } - } -} - -// ReturnFromZip will read the supplied zip and add all as corpus for f. -// Byte slices only. -func ReturnFromZip(tb testing.TB, filename string, t InputType, fn func([]byte)) { - file, err := os.Open(filename) - if err != nil { - tb.Fatal(err) - } - fi, err := file.Stat() - if fi == nil { - return - } - if err != nil { - tb.Fatal(err) - } - zr, err := zip.NewReader(file, fi.Size()) - if err != nil { - tb.Fatal(err) - } - for _, file := range zr.File { - rc, err := file.Open() - if err != nil { - tb.Fatal(err) - } - - b, err := io.ReadAll(rc) - if err != nil { - tb.Fatal(err) - } - rc.Close() - t := t - if t == TypeOSSFuzz { - t = TypeRaw // Fallback - if len(b) >= 4 { - sz := binary.BigEndian.Uint32(b) - if sz <= uint32(len(b))-4 { - fn(b[4 : 4+sz]) - continue - } - } - } - - if bytes.HasPrefix(b, []byte("go test fuzz")) { - t = TypeGoFuzz - } else { - t = TypeRaw - } - - if t == TypeRaw { - fn(b) - continue - } - vals, err := unmarshalCorpusFile(b) - if err != nil { - tb.Fatal(err) - } - for _, v := range vals { - fn(v) - } - } -} - -// unmarshalCorpusFile decodes corpus bytes into their respective values. -func unmarshalCorpusFile(b []byte) ([][]byte, error) { - if len(b) == 0 { - return nil, fmt.Errorf("cannot unmarshal empty string") - } - lines := bytes.Split(b, []byte("\n")) - if len(lines) < 2 { - return nil, fmt.Errorf("must include version and at least one value") - } - vals := make([][]byte, 0, len(lines)-1) - for _, line := range lines[1:] { - line = bytes.TrimSpace(line) - if len(line) == 0 { - continue - } - v, err := parseCorpusValue(line) - if err != nil { - return nil, fmt.Errorf("malformed line %q: %v", line, err) - } - vals = append(vals, v) - } - return vals, nil -} - -// parseCorpusValue -func parseCorpusValue(line []byte) ([]byte, error) { - fs := token.NewFileSet() - expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) - if err != nil { - return nil, err - } - call, ok := expr.(*ast.CallExpr) - if !ok { - return nil, fmt.Errorf("expected call expression") - } - if len(call.Args) != 1 { - return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) - } - arg := call.Args[0] - - if arrayType, ok := call.Fun.(*ast.ArrayType); ok { - if arrayType.Len != nil { - return nil, fmt.Errorf("expected []byte or primitive type") - } - elt, ok := arrayType.Elt.(*ast.Ident) - if !ok || elt.Name != "byte" { - return nil, fmt.Errorf("expected []byte") - } - lit, ok := arg.(*ast.BasicLit) - if !ok || lit.Kind != token.STRING { - return nil, fmt.Errorf("string literal required for type []byte") - } - s, err := strconv.Unquote(lit.Value) - if err != nil { - return nil, err - } - return []byte(s), nil - } - return nil, fmt.Errorf("expected []byte") -} diff --git a/internal/compress/internal/le/le.go b/internal/compress/internal/le/le.go deleted file mode 100644 index 890ba873..00000000 --- a/internal/compress/internal/le/le.go +++ /dev/null @@ -1,6 +0,0 @@ -// Package le provides fast little endian integer routines. -package le - -type Indexer interface { - int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 -} diff --git a/internal/compress/internal/le/unsafe_disabled.go b/internal/compress/internal/le/unsafe_disabled.go deleted file mode 100644 index 4f2a0d8c..00000000 --- a/internal/compress/internal/le/unsafe_disabled.go +++ /dev/null @@ -1,42 +0,0 @@ -//go:build !(amd64 || arm64 || ppc64le || riscv64) || nounsafe || purego || appengine - -package le - -import ( - "encoding/binary" -) - -// Load8 will load from b at index i. -func Load8[I Indexer](b []byte, i I) byte { - return b[i] -} - -// Load16 will load from b at index i. -func Load16[I Indexer](b []byte, i I) uint16 { - return binary.LittleEndian.Uint16(b[i:]) -} - -// Load32 will load from b at index i. -func Load32[I Indexer](b []byte, i I) uint32 { - return binary.LittleEndian.Uint32(b[i:]) -} - -// Load64 will load from b at index i. -func Load64[I Indexer](b []byte, i I) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - -// Store16 will store v at b. -func Store16(b []byte, v uint16) { - binary.LittleEndian.PutUint16(b, v) -} - -// Store32 will store v at b. -func Store32(b []byte, v uint32) { - binary.LittleEndian.PutUint32(b, v) -} - -// Store64 will store v at b. -func Store64[I Indexer](b []byte, i I, v uint64) { - binary.LittleEndian.PutUint64(b[i:], v) -} diff --git a/internal/compress/internal/le/unsafe_enabled.go b/internal/compress/internal/le/unsafe_enabled.go deleted file mode 100644 index b47fd0db..00000000 --- a/internal/compress/internal/le/unsafe_enabled.go +++ /dev/null @@ -1,52 +0,0 @@ -// We enable 64 bit LE platforms: - -//go:build (amd64 || arm64 || ppc64le || riscv64) && !nounsafe && !purego && !appengine - -package le - -import ( - "unsafe" -) - -// Load8 will load from b at index i. -func Load8[I Indexer](b []byte, i I) byte { - // return binary.LittleEndian.Uint16(b[i:]) - // return *(*uint16)(unsafe.Pointer(&b[i])) - return *(*byte)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) -} - -// Load16 will load from b at index i. -func Load16[I Indexer](b []byte, i I) uint16 { - // return binary.LittleEndian.Uint16(b[i:]) - // return *(*uint16)(unsafe.Pointer(&b[i])) - return *(*uint16)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) -} - -// Load32 will load from b at index i. -func Load32[I Indexer](b []byte, i I) uint32 { - // return binary.LittleEndian.Uint32(b[i:]) - // return *(*uint32)(unsafe.Pointer(&b[i])) - return *(*uint32)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) -} - -// Load64 will load from b at index i. -func Load64[I Indexer](b []byte, i I) uint64 { - // return binary.LittleEndian.Uint64(b[i:]) - // return *(*uint64)(unsafe.Pointer(&b[i])) - return *(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) -} - -// Store16 will store v at b. -func Store16(b []byte, v uint16) { - *(*uint16)(unsafe.Pointer(unsafe.SliceData(b))) = v -} - -// Store32 will store v at b. -func Store32(b []byte, v uint32) { - *(*uint32)(unsafe.Pointer(unsafe.SliceData(b))) = v -} - -// Store64 will store v at b[i:]. -func Store64[I Indexer](b []byte, i I, v uint64) { - *(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) = v -} diff --git a/internal/compress/zlib/reader.go b/internal/compress/zlib/reader.go index f58a904a..74357525 100644 --- a/internal/compress/zlib/reader.go +++ b/internal/compress/zlib/reader.go @@ -34,12 +34,14 @@ and to read that data back: package zlib import ( + "bytes" "encoding/binary" "errors" "hash" "io" - "lindenii.org/go/furgit/internal/compress/flate" + "github.com/klauspost/compress/flate" + "lindenii.org/go/lgo/intconv" "lindenii.org/go/lgo/sync" ) @@ -74,6 +76,7 @@ type Reader struct { trailerRead uint64 err error scratch [4]byte + br bytes.Reader } // NewReader creates a new ReadCloser. @@ -99,6 +102,23 @@ func NewReaderDict(r io.Reader, dict []byte) (*Reader, error) { return z, nil } +// NewReaderBytes is like [NewReader] but reads directly from payload, +// reusing a [bytes.Reader] pooled with the returned Reader +// instead of allocating a fresh one per call. +// It is the caller's responsibility to call Close on the ReadCloser when done. +func NewReaderBytes(payload []byte) (*Reader, error) { + z := readerPool.Get() + + z.br.Reset(payload) + + err := z.reset(&z.br, nil) + if err != nil { + return nil, err + } + + return z, nil +} + // Read decompresses bytes from receiver into p. func (z *Reader) Read(p []byte) (int, error) { if z.err != nil { @@ -167,6 +187,7 @@ func (z *Reader) Close() error { return z.err } + z.br.Reset(nil) readerPool.Put(z) return nil diff --git a/internal/compress/zlib/reader_reset.go b/internal/compress/zlib/reader_reset.go index b1b9afa6..6a9340c2 100644 --- a/internal/compress/zlib/reader_reset.go +++ b/internal/compress/zlib/reader_reset.go @@ -10,14 +10,15 @@ import ( "errors" "io" + "github.com/klauspost/compress/flate" + "lindenii.org/go/furgit/internal/adler32" - "lindenii.org/go/furgit/internal/compress/flate" "lindenii.org/go/lgo/intconv" ) // reset resets receiver to read a new zlib stream. func (z *Reader) reset(r io.Reader, dict []byte) error { - *z = Reader{decompressor: z.decompressor} + *z = Reader{decompressor: z.decompressor, digest: z.digest, br: z.br} var input flate.Reader if fr, ok := r.(flate.Reader); ok { @@ -95,7 +96,11 @@ func (z *Reader) reset(r io.Reader, dict []byte) error { return z.err } - z.digest = adler32.New() + if z.digest == nil { + z.digest = adler32.New() + } else { + z.digest.Reset() + } return nil } diff --git a/internal/compress/zlib/writer.go b/internal/compress/zlib/writer.go index 98053e71..0fcb2ca8 100644 --- a/internal/compress/zlib/writer.go +++ b/internal/compress/zlib/writer.go @@ -10,7 +10,8 @@ import ( "hash" "io" - "lindenii.org/go/furgit/internal/compress/flate" + "github.com/klauspost/compress/flate" + "lindenii.org/go/lgo/sync" ) diff --git a/internal/compress/zlib/writer_header.go b/internal/compress/zlib/writer_header.go index e23e73d6..00407f49 100644 --- a/internal/compress/zlib/writer_header.go +++ b/internal/compress/zlib/writer_header.go @@ -7,8 +7,9 @@ package zlib import ( "encoding/binary" + "github.com/klauspost/compress/flate" + "lindenii.org/go/furgit/internal/adler32" - "lindenii.org/go/furgit/internal/compress/flate" ) // writeHeader writes the ZLIB header. diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go index 4210d1b3..656a69d0 100644 --- a/internal/format/packfile/delta/apply.go +++ b/internal/format/packfile/delta/apply.go @@ -106,7 +106,7 @@ func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) value := 0 for i := range count { - if op&(1<<(firstBit+uint(i))) == 0 { + if op&(1<<(firstBit+uint(i))) == 0 { //nolint:gosec continue } diff --git a/internal/format/packidx/bloom/bloom.go b/internal/format/packidx/bloom/bloom.go new file mode 100644 index 00000000..b3fc2f7c --- /dev/null +++ b/internal/format/packidx/bloom/bloom.go @@ -0,0 +1,180 @@ +package bloom + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math/bits" + + "lindenii.org/go/furgit/object/id" +) + +// ErrMalformedBloomFilter reports that +// a Bloom filter is truncated, +// has a bad signature, version, or hash function, +// or has inconsistent parameters. +var ErrMalformedBloomFilter = errors.New("internal/format/packidx/bloom: malformed bloom filter") + +const ( + signature = 0x4944424c // "IDBL" + version = 1 + + // HeaderLen is the fixed header length in octets, + // i.e., the signature, version, hash function identifier, + // B, K, and the trailing zero padding. + HeaderLen = 64 + + // BucketLen is the length of one bucket in octets, + // chosen to match the most common cache-line size. + BucketLen = 64 + + // wordBits is the bit width of one bucket word. + wordBits = 64 + + // fieldBits is the width of one in-bucket position field. + fieldBits = 9 +) + +// checkParams validates the filter parameters +// against one object hash size, +// returning log2(bucketCount) on success. +func checkParams(bucketCount uint32, k uint16, hashSize int) (uint, error) { + switch { + case bucketCount == 0 || bucketCount&(bucketCount-1) != 0: + return 0, errors.New("bucket count not a nonzero power of two") //nolint:err113 + case k == 0: + return 0, errors.New("zero probe count") //nolint:err113 + } + + log2B := uint(bits.TrailingZeros32(bucketCount)) //nolint:gosec + if log2B+fieldBits*uint(k) > uint(hashSize)*8 { //nolint:gosec + return 0, errors.New("parameters exceed hash length") //nolint:err113 + } + + return log2B, nil +} + +// hashFunctionID returns the on-disk hash function identifier +// for one object format. +func hashFunctionID(objectFormat id.ObjectFormat) (uint32, error) { + switch objectFormat { + case id.ObjectFormatSHA1: + return 1, nil + case id.ObjectFormatSHA256: + return 2, nil + case id.ObjectFormatUnknown: + } + + return 0, id.ErrInvalidObjectFormat +} + +// Bloom is a parsed blocked Bloom filter view over borrowed bytes. +// +// Labels: Deps-Borrowed, Life-Parent, MT-Safe. +type Bloom struct { + // data is the entire filter payload. + data []byte + + // buckets is the bucket region, between the header and the trailer. + buckets []byte + + // objectFormat is the filter's object format. + objectFormat id.ObjectFormat + + // log2B is the base-2 logarithm of the bucket count, + // i.e. the number of leading object ID bits that select a bucket. + log2B uint + + // k is the number of bits set and tested per object ID. + k int +} + +// Parse parses one Bloom filter from data. +// +// Labels: Deps-Borrowed, Life-Parent. +func Parse(data []byte, objectFormat id.ObjectFormat) (Bloom, error) { + var zero Bloom + + wantHashID, err := hashFunctionID(objectFormat) + if err != nil { + return zero, err + } + + hashSize := objectFormat.Size() + + if len(data) < HeaderLen { + return zero, fmt.Errorf("%w: truncated", ErrMalformedBloomFilter) + } + + if binary.BigEndian.Uint32(data) != signature { + return zero, fmt.Errorf("%w: bad signature", ErrMalformedBloomFilter) + } + + if binary.BigEndian.Uint32(data[4:]) != version { + return zero, fmt.Errorf("%w: unsupported version", ErrMalformedBloomFilter) + } + + if binary.BigEndian.Uint32(data[8:]) != wantHashID { + return zero, fmt.Errorf("%w: hash function mismatch", ErrMalformedBloomFilter) + } + + bucketCount := binary.BigEndian.Uint32(data[12:]) + k := binary.BigEndian.Uint16(data[16:]) + + for _, octet := range data[18:HeaderLen] { + if octet != 0 { + return zero, fmt.Errorf("%w: nonzero padding", ErrMalformedBloomFilter) + } + } + + log2B, err := checkParams(bucketCount, k, hashSize) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedBloomFilter, err) + } + + want := uint64(HeaderLen) + uint64(BucketLen)*uint64(bucketCount) + 2*uint64(hashSize) //#nosec G115 + if uint64(len(data)) != want { + return zero, fmt.Errorf("%w: file size disagrees with bucket count", ErrMalformedBloomFilter) + } + + return Bloom{ + data: data, + buckets: data[HeaderLen : len(data)-2*hashSize], + objectFormat: objectFormat, + log2B: log2B, + k: int(k), + }, nil +} + +// PackHash returns the pack hash recorded in the filter trailer. +// +// Labels: Life-Parent, Mut-No. +func (f *Bloom) PackHash() []byte { + hashSize := f.objectFormat.Size() + end := len(f.data) - hashSize + + return f.data[end-hashSize : end] +} + +// Verify recomputes the filter's trailing checksum and reports any mismatch. +// +// Verify reads the whole filter, +// so callers should treat it as a deliberate integrity check +// rather than part of the open path. +func (f *Bloom) Verify() error { + hashImpl, err := f.objectFormat.New() + if err != nil { + return fmt.Errorf("internal/format/packidx/bloom: %w", err) + } + + checksumOff := len(f.data) - f.objectFormat.Size() + + _, _ = hashImpl.Write(f.data[:checksumOff]) + + if !bytes.Equal(hashImpl.Sum(nil), f.data[checksumOff:]) { + return fmt.Errorf("%w: checksum mismatch", ErrMalformedBloomFilter) + } + + return nil +} diff --git a/internal/format/packidx/bloom/bloom_test.go b/internal/format/packidx/bloom/bloom_test.go new file mode 100644 index 00000000..bcfb4419 --- /dev/null +++ b/internal/format/packidx/bloom/bloom_test.go @@ -0,0 +1,159 @@ +package bloom_test + +import ( + "encoding/binary" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func validFilter(t *testing.T, format id.ObjectFormat) []byte { + // TODO: maybe testgit should have something like this? + t.Helper() + + builder, err := bloom.NewBuilder(format, 4, 2, make([]byte, format.Size())) + if err != nil { + t.Fatal(err) + } + + return builder.Bytes() +} + +func otherFormat(t *testing.T, format id.ObjectFormat) id.ObjectFormat { + t.Helper() + + for _, candidate := range id.SupportedObjectFormats() { + if candidate != format { + return candidate + } + } + + t.Skip("only one supported object format") + + return id.ObjectFormatUnknown +} + +func TestParseValid(t *testing.T) { + t.Parallel() + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + _, err := bloom.Parse(validFilter(t, format), format) + if err != nil { + t.Fatalf("Parse rejected a valid filter: %v", err) + } + }) + } +} + +func TestParseMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + mangle func(data []byte) []byte + }{ + {"truncated", func(data []byte) []byte { return data[:bloom.HeaderLen-1] }}, + {"bad signature", func(data []byte) []byte { + data[0] ^= 0xff + + return data + }}, + {"bad version", func(data []byte) []byte { + binary.BigEndian.PutUint32(data[4:], 99) + + return data + }}, + {"non power of two", func(data []byte) []byte { + binary.BigEndian.PutUint32(data[12:], 3) + + return data + }}, + {"zero probe count", func(data []byte) []byte { + binary.BigEndian.PutUint16(data[16:], 0) + + return data + }}, + {"parameters exceed hash", func(data []byte) []byte { + binary.BigEndian.PutUint32(data[12:], 1<<31) + binary.BigEndian.PutUint16(data[16:], 30) + + return data + }}, + {"nonzero padding", func(data []byte) []byte { + data[20] = 1 + + return data + }}, + {"size disagrees", func(data []byte) []byte { return data[:len(data)-1] }}, + } + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := tc.mangle(append([]byte(nil), validFilter(t, format)...)) + + _, err := bloom.Parse(data, format) + if !errors.Is(err, bloom.ErrMalformedBloomFilter) { + t.Fatalf("Parse error = %v, want ErrMalformedBloomFilter", err) + } + }) + } + }) + } +} + +// TestVerifyDetectsCorruption checks that Verify accepts a sound filter +// and rejects one whose bucket bytes have been altered. +func TestVerifyDetectsCorruption(t *testing.T) { + t.Parallel() + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + data := validFilter(t, format) + + filter, err := bloom.Parse(data, format) + if err != nil { + t.Fatal(err) + } + + err = filter.Verify() + if err != nil { + t.Fatalf("Verify on a sound filter: %v", err) + } + + data[bloom.HeaderLen] ^= 0xff + + err = filter.Verify() + if !errors.Is(err, bloom.ErrMalformedBloomFilter) { + t.Fatalf("Verify error = %v, want ErrMalformedBloomFilter", err) + } + }) + } +} + +func TestParseHashMismatch(t *testing.T) { + t.Parallel() + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + _, err := bloom.Parse(validFilter(t, format), otherFormat(t, format)) + if !errors.Is(err, bloom.ErrMalformedBloomFilter) { + t.Fatalf("Parse error = %v, want ErrMalformedBloomFilter", err) + } + }) + } +} diff --git a/internal/format/packidx/bloom/doc.go b/internal/format/packidx/bloom/doc.go new file mode 100644 index 00000000..06ca57cd --- /dev/null +++ b/internal/format/packidx/bloom/doc.go @@ -0,0 +1,138 @@ +// Package bloom provides a blocked Bloom filter +// for pack indexes. +// +// A filter answers, from a single cache-line-sized read, +// whether an object ID is definitely absent from the index it covers. +// A lookup that must consult many packs +// can then skip the full binary search +// in every pack whose filter rejects the object, +// decreasing the cost of misses. +// +// # Rationale +// +// Especially for server-side usage, +// repacking is expensive, +// and creating multi-pack-indexes is still rather expensive. +// Incremental multi-pack-indexes partially solve this, +// but having too many of them defeats the purpose, +// since the indexes must still be walked in order +// while performing expensive lookups. +// +// Instead, each multi-pack-index layer, +// and each ordinary pack index, +// may carry its own filter. +// The indexes are still traversed in their usual order, +// but the first step when traversing one +// is to check whether it could possibly hold the wanted object. +// +// The filter is split into 64-octet buckets, +// matching the most common cache-line size. +// Some bits of the object ID choose the bucket, +// and the rest choose several bit positions inside it, +// so a lookup reads one 64-octet bucket +// and checks whether all required bits are set. +// +// # Parameters +// +// A filter is parameterized by +// the number of buckets B +// and the number of bits set and tested per object ID, K. +// All integers in the format are big endian. +// The object ID is interpreted as a big-endian bitstring, +// where bit offset 0 is the most significant bit of octet 0. +// B must be a nonzero power of two, +// K must be nonzero, +// and log2(B) + 9*K must not exceed the hash length in bits. +// +// # File format +// +// A filter file is a 64-octet header, +// then B buckets of 64 octets each, +// then a two-hash trailer: +// +// - 4-octet signature: {'I', 'D', 'B', 'L'}. +// - 4-octet version identifier (= 1). +// - 4-octet object hash algorithm identifier +// (= 1 for SHA-1, 2 for SHA-256). +// - 4-octet B, the number of buckets. +// - 2-octet K, the number of bits set and tested per object ID. +// - 46-octet padding, which must be all zero. +// - B buckets of 64 octets each. +// - the pack trailer hash, which binds the filter to its pack. +// - the checksum of everything before it, over the filter's hash function. +// +// The hash length is that of the object format, +// so the trailer is 2 hashes wide +// and the file size is exactly 64 + 64*B + 2*hashlen octets. +// +// A reader must validate that +// the signature matches, +// the version is supported, +// the hash function identifier is recognized, +// B is nonzero and a power of two, +// K is nonzero, +// log2(B) + 9*K does not exceed the hash length in bits, +// the padding is all zero, +// and the file size is exactly 64 + 64*B + 2*hashlen octets. +// +// # Binding and integrity +// +// The pack hash binds a filter to one pack; +// a reader trusts a filter only when the recorded pack hash +// matches the pack it accompanies. +// +// The checksum guards against corruption of the filter itself. +// Recomputing it reads the whole file and rehashes it as fsck. +// +// # Lookup +// +// A lookup against one filter proceeds as follows: +// +// 1. Let b be the unsigned integer encoded +// by the most significant log2(B) bits of the object ID. +// B is a power of two, so 0 <= b < B. +// 2. Select and read bucket b. +// 3. For each 0 <= i < K, +// take the i-th 9-bit field +// from the 9*K bits that follow the bucket-selecting bits, +// and let pi be the unsigned integer it encodes, +// so 0 <= pi < 512. +// Compute wi = pi >> 6 and bi = pi & 63, +// so wi identifies one of the eight 64-bit words in bucket b +// and bi identifies one bit within that word. +// Within each 64-bit word, +// bit index 0 is the most significant bit +// and bit index 63 is the least significant bit. +// Test whether bit bi is set in word wi of bucket b. +// +// If any test fails, +// the object ID is definitely not in the covered index. +// If all tests succeed, +// the object ID may be in it. +// Two of the K 9-bit fields can decode to the same pi, +// so an insertion may set fewer than K distinct bits; +// this only raises the false positive rate +// and never causes a false negative. +// +// # Worked example +// +// Let B = 1 << 15 = 32768 and K = 8. +// Then log2(B) = 15, +// so each lookup uses 15 bits to choose the bucket +// and 8*9 = 72 bits to choose the in-bucket positions, +// for a total of 87 bits taken from the object ID. +// A SHA-1 has 160 bits and a SHA-256 has 256 bits, +// so both leave ample headroom. +// +// # Security considerations +// +// Object IDs are public unkeyed hashes, +// so an adversary can mine packs +// whose object IDs share a chosen prefix +// to crowd objects into one bucket +// and fill its bits. +// In the worst case this renders some buckets useless, +// making the filter degrade to "may contain" for those buckets, +// but it never produces a false negative +// and is not a significant denial-of-service vector. +package bloom diff --git a/internal/format/packidx/bloom/lookup.go b/internal/format/packidx/bloom/lookup.go new file mode 100644 index 00000000..4ca32913 --- /dev/null +++ b/internal/format/packidx/bloom/lookup.go @@ -0,0 +1,42 @@ +package bloom + +import ( + "encoding/binary" +) + +// MayContain reports whether oid may be present +// in the index covered by the filter. +// +// oid must be exactly the filter's hash size; +// MayContain panics otherwise. +// +// Labels: Mut-No. +func (f *Bloom) MayContain(oid []byte) bool { + if len(oid) != f.objectFormat.Size() { + panic("internal/format/packidx/bloom: invalid object ID length") + } + + base := int(binary.BigEndian.Uint32(oid[:4])>>(32-f.log2B)) * BucketLen + + for i := range f.k { + word, mask := probe(oid, f.log2B, i) + if binary.BigEndian.Uint64(f.buckets[base+word*8:])&mask == 0 { + return false + } + } + + return true +} + +// probe returns the bucket word index and single-bit mask +// addressed by the i-th probe of oid. +func probe(oid []byte, log2B uint, i int) (word int, mask uint64) { + bitOff := log2B + fieldBits*uint(i) //#nosec G115 + byteOff := bitOff >> 3 + bitInByte := bitOff & 7 + + window := uint32(oid[byteOff])<<8 | uint32(oid[byteOff+1]) + pi := (window >> (16 - bitInByte - fieldBits)) & 0x1ff + + return int(pi >> 6), 1 << (wordBits - 1 - (pi & 63)) +} diff --git a/internal/format/packidx/bloom/lookup_test.go b/internal/format/packidx/bloom/lookup_test.go new file mode 100644 index 00000000..e6264f9a --- /dev/null +++ b/internal/format/packidx/bloom/lookup_test.go @@ -0,0 +1,32 @@ +package bloom_test + +import ( + "testing" + + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func TestMayContainBadLength(t *testing.T) { + t.Parallel() + + format := id.ObjectFormatSHA256 + + builder, err := bloom.NewBuilder(format, 4, 2, make([]byte, format.Size())) + if err != nil { + t.Fatal(err) + } + + filter, err := bloom.Parse(builder.Bytes(), format) + if err != nil { + t.Fatal(err) + } + + defer func() { + if recover() == nil { + t.Fatal("MayContain did not panic on a short object ID") + } + }() + + filter.MayContain(make([]byte, format.Size()-1)) +} diff --git a/internal/format/packidx/bloom/roundtrip_test.go b/internal/format/packidx/bloom/roundtrip_test.go new file mode 100644 index 00000000..9f831538 --- /dev/null +++ b/internal/format/packidx/bloom/roundtrip_test.go @@ -0,0 +1,92 @@ +package bloom_test + +import ( + "bytes" + "encoding/binary" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func makeOID(size int, seed uint64) []byte { + out := make([]byte, size) + state := seed + + for i := 0; i < size; i += 8 { + state = state*6364136223846793005 + 1442695040888963407 + + var word [8]byte + + binary.BigEndian.PutUint64(word[:], state) + copy(out[i:], word[:]) + } + + return out +} + +func TestRoundTrip(t *testing.T) { + t.Parallel() + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + const objects = 10000 + + bucketCount, k, err := bloom.RecommendParams(format, objects) + if err != nil { + t.Fatal(err) + } + + size := format.Size() + packHash := makeOID(size, 0xC0FFEE) + + builder, err := bloom.NewBuilder(format, bucketCount, k, packHash) + if err != nil { + t.Fatal(err) + } + + for i := range objects { + builder.Add(makeOID(size, uint64(i))) //nolint:gosec + } + + filter, err := bloom.Parse(builder.Bytes(), format) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(filter.PackHash(), packHash) { + t.Fatalf("PackHash = %x, want %x", filter.PackHash(), packHash) + } + + err = filter.Verify() + if err != nil { + t.Fatalf("Verify on a freshly built filter: %v", err) + } + + for i := range objects { + if !filter.MayContain(makeOID(size, uint64(i))) { //nolint:gosec + t.Fatalf("false negative for added object %d", i) + } + } + + const probes = 10000 + + falsePositives := 0 + + for i := range probes { + if filter.MayContain(makeOID(size, uint64(1)<<40+uint64(i))) { //nolint:gosec + falsePositives++ + } + } + + rate := float64(falsePositives) / float64(probes) + if rate > 0.05 { + t.Errorf("false positive rate %.4f exceeds 0.05", rate) + } + + t.Logf("B=%d K=%d false positive rate %.4f", bucketCount, k, rate) + }) + } +} diff --git a/internal/format/packidx/bloom/write.go b/internal/format/packidx/bloom/write.go new file mode 100644 index 00000000..e6213a2c --- /dev/null +++ b/internal/format/packidx/bloom/write.go @@ -0,0 +1,164 @@ +package bloom + +import ( + "encoding/binary" + "errors" + "fmt" + "hash" + "math/bits" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +// ErrInvalidParameters reports that +// the parameters supplied for a filter build +// are not representable in the format. +var ErrInvalidParameters = errors.New("internal/format/packidx/bloom: invalid parameters") + +// defaultK is the probe count used by [RecommendParams]. +// +// With 512-bit buckets it keeps the false positive rate near one percent +// at the target bucket load. +const defaultK = 8 + +// targetLoad is the object count per bucket that [RecommendParams] aims for. +const targetLoad = 48 + +// Builder accumulates object IDs into an in-memory Bloom filter +// and serializes it. +// +// Labels: MT-Unsafe. +type Builder struct { + // data is the full filter file, header and trailer included. + data []byte + + // buckets aliases the bucket region of data, between header and trailer. + buckets []byte + + // hashImpl computes the trailing checksum and gives the hash size. + hashImpl hash.Hash + + log2B uint + k int +} + +// NewBuilder creates a filter builder +// for bucketCount buckets and k probes per object ID, +// binding the filter to packHash. +// +// bucketCount must be a nonzero power of two, +// k must be nonzero, +// and log2(bucketCount) + 9*k must not exceed the hash length in bits. +// packHash must be the pack's trailer hash; +// NewBuilder panics when its length does not match the object format. +func NewBuilder(objectFormat id.ObjectFormat, bucketCount uint32, k uint16, packHash []byte) (*Builder, error) { + hashID, err := hashFunctionID(objectFormat) + if err != nil { + return nil, err + } + + hashImpl, err := objectFormat.New() + if err != nil { + return nil, fmt.Errorf("internal/format/packidx/bloom: %w", err) + } + + hashSize := objectFormat.Size() + + if len(packHash) != hashSize { + panic("internal/format/packidx/bloom: invalid pack hash length") + } + + log2B, err := checkParams(bucketCount, k, hashSize) + if err != nil { + return nil, fmt.Errorf("%w: %w", ErrInvalidParameters, err) + } + + total, err := intconv.Uint64ToInt(uint64(HeaderLen) + uint64(BucketLen)*uint64(bucketCount) + 2*uint64(hashSize)) //#nosec G115 + if err != nil { + return nil, fmt.Errorf("%w: %w", ErrInvalidParameters, err) + } + + data := make([]byte, total) + binary.BigEndian.PutUint32(data[0:], signature) + binary.BigEndian.PutUint32(data[4:], version) + binary.BigEndian.PutUint32(data[8:], hashID) + binary.BigEndian.PutUint32(data[12:], bucketCount) + binary.BigEndian.PutUint16(data[16:], k) + + bucketsEnd := total - 2*hashSize + copy(data[bucketsEnd:], packHash) + + return &Builder{ + data: data, + buckets: data[HeaderLen:bucketsEnd], + hashImpl: hashImpl, + log2B: log2B, + k: int(k), + }, nil +} + +// Add records oid in the filter. +// +// oid must be exactly the filter's hash size; +// Add panics otherwise. +func (b *Builder) Add(oid []byte) { + if len(oid) != b.hashImpl.Size() { + panic("internal/format/packidx/bloom: invalid object ID length") + } + + base := int(binary.BigEndian.Uint32(oid[:4])>>(32-b.log2B)) * BucketLen + + for i := range b.k { + word, mask := probe(oid, b.log2B, i) + + off := base + word*8 + set := binary.BigEndian.Uint64(b.buckets[off:]) | mask + binary.BigEndian.PutUint64(b.buckets[off:], set) + } +} + +// Bytes returns the serialized filter, including its trailing checksum. +// +// Labels: Life-Parent, Mut-No. +func (b *Builder) Bytes() []byte { + checksumOff := len(b.data) - b.hashImpl.Size() + + b.hashImpl.Reset() + _, _ = b.hashImpl.Write(b.data[:checksumOff]) + b.hashImpl.Sum(b.data[checksumOff:checksumOff]) + + return b.data +} + +// RecommendParams returns filter parameters for an index of n objects, +// targeting a false positive rate near one percent. +func RecommendParams(objectFormat id.ObjectFormat, n int) (bucketCount uint32, k uint16, err error) { + hashSize := objectFormat.Size() + if hashSize == 0 { + return 0, 0, id.ErrInvalidObjectFormat + } + + const maxPow2 = uint32(1) << 31 + + wanted := uint64(0) + if n > 0 { + wanted = (uint64(n) + targetLoad - 1) / targetLoad + } + + switch { + case wanted <= 1: + bucketCount = 1 + case wanted > uint64(maxPow2): + bucketCount = maxPow2 + default: + bucketCount = uint32(1) << bits.Len64(wanted-1) + } + + _, err = checkParams(bucketCount, defaultK, hashSize) + if err != nil { + return 0, 0, fmt.Errorf("%w: %w", ErrInvalidParameters, err) + } + + return bucketCount, defaultK, nil +} diff --git a/internal/format/packidx/bloom/write_test.go b/internal/format/packidx/bloom/write_test.go new file mode 100644 index 00000000..74173921 --- /dev/null +++ b/internal/format/packidx/bloom/write_test.go @@ -0,0 +1,95 @@ +package bloom_test + +import ( + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func TestRecommendParams(t *testing.T) { + t.Parallel() + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + for _, n := range []int{0, 1, 1000, 10000, 1000000} { + bucketCount, k, err := bloom.RecommendParams(format, n) + if err != nil { + t.Fatalf("n=%d: %v", n, err) + } + + if bucketCount == 0 || bucketCount&(bucketCount-1) != 0 { + t.Errorf("n=%d: bucket count %d not a power of two", n, bucketCount) + } + + _, err = bloom.NewBuilder(format, bucketCount, k, make([]byte, format.Size())) + if err != nil { + t.Errorf("n=%d: recommended parameters rejected: %v", n, err) + } + } + }) + } +} + +func TestNewBuilderRejects(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + bucketCount uint32 + k uint16 + }{ + {"zero buckets", 0, 8}, + {"non power of two", 3, 8}, + {"zero probe count", 4, 0}, + } + + for _, format := range id.SupportedObjectFormats() { + t.Run(format.String(), func(t *testing.T) { + t.Parallel() + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, err := bloom.NewBuilder(format, tc.bucketCount, tc.k, make([]byte, format.Size())) + if !errors.Is(err, bloom.ErrInvalidParameters) { + t.Fatalf("error = %v, want ErrInvalidParameters", err) + } + }) + } + }) + } +} + +func TestNewBuilderBadPackHash(t *testing.T) { + t.Parallel() + + defer func() { + if recover() == nil { + t.Fatal("NewBuilder did not panic on a short pack hash") + } + }() + + _, _ = bloom.NewBuilder(id.ObjectFormatSHA256, 4, 2, make([]byte, id.ObjectFormatSHA256.Size()-1)) +} + +func TestAddBadLength(t *testing.T) { + t.Parallel() + + builder, err := bloom.NewBuilder(id.ObjectFormatSHA256, 4, 2, make([]byte, id.ObjectFormatSHA256.Size())) + if err != nil { + t.Fatal(err) + } + + defer func() { + if recover() == nil { + t.Fatal("Add did not panic on a short object ID") + } + }() + + builder.Add(make([]byte, id.ObjectFormatSHA256.Size()-1)) +} diff --git a/internal/format/packidx/lookup.go b/internal/format/packidx/lookup.go index d1293f47..71847f21 100644 --- a/internal/format/packidx/lookup.go +++ b/internal/format/packidx/lookup.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "math/bits" ) // Lookup searches the index for one object ID @@ -18,6 +19,53 @@ func (idx *Packidx) Lookup(oid []byte) (offset uint64, found bool, err error) { lo, hi := idx.fanoutRange(oid[0]) + // Object IDs are uniform for honest inputs, + // interp on next 8 octets converges in O(log log n). + // + // OIDs are public unkeyed hashes, + // so an attacker may sample/filter/mine prefix clusters, + // making interpolation mis-estimate every probe. + // See https://runxiyu.org/comp/ch4ht/ + // for why cryptographic hash algorithms are insufficient. + // + // Cap the interp at bisect's probe count and finish by bisect; + // adversarial at O(log n) plus small interpolation overhead, + // honest exit well under the cap. + target := binary.BigEndian.Uint64(oid[1:9]) + + for budget := bits.Len(uint(hi - lo)); hi-lo > 8 && budget > 0; budget-- { //nolint:gosec + loKey := binary.BigEndian.Uint64(idx.OIDAt(lo)[1:9]) + hiKey := binary.BigEndian.Uint64(idx.OIDAt(hi - 1)[1:9]) + + var mid int + + switch { + case target <= loKey: + mid = lo + case target >= hiKey: + mid = hi - 1 + default: + hi128, lo128 := bits.Mul64(target-loKey, uint64(hi-lo-1)) //#nosec G115 + q, _ := bits.Div64(hi128, lo128, hiKey-loKey) + mid = lo + int(q) //#nosec G115 + } + + switch cmp := bytes.Compare(oid, idx.OIDAt(mid)); { + case cmp == 0: + offset, err = idx.OffsetAt(mid) + if err != nil { + return 0, false, err + } + + return offset, true, nil + case cmp < 0: + hi = mid + default: + lo = mid + 1 + } + } + + // Interpolation narrowed or capped; bisect to finish. for lo < hi { mid := lo + (hi-lo)/2 diff --git a/internal/format/packidx/write.go b/internal/format/packidx/write.go index d3f22c83..35b2805f 100644 --- a/internal/format/packidx/write.go +++ b/internal/format/packidx/write.go @@ -84,7 +84,7 @@ func Write(w io.Writer, objectFormat id.ObjectFormat, entries []Entry, packHash sw.PutUint32(entries[i].CRC32) } - var largeOffsets []uint64 + largeOffsets := make([]uint64, 0, len(entries)) for i := range entries { offset := entries[i].Offset diff --git a/internal/format/packidx/write_test.go b/internal/format/packidx/write_test.go index 68df3ece..866fa274 100644 --- a/internal/format/packidx/write_test.go +++ b/internal/format/packidx/write_test.go @@ -16,8 +16,8 @@ func syntheticEntries(n int) []packidx.Entry { for i := range entries { entries[i].OID[0] = byte(i * 7) entries[i].OID[1] = byte(i + 1) - entries[i].Offset = uint64(i+1) * 100 - entries[i].CRC32 = uint32(i+1) * 0x01010101 + entries[i].Offset = uint64(i+1) * 100 //#nosec G115 + entries[i].CRC32 = uint32(i+1) * 0x01010101 //#nosec G115 } return entries diff --git a/internal/mru/order.go b/internal/mru/order.go index 76b58497..1ea1ac09 100644 --- a/internal/mru/order.go +++ b/internal/mru/order.go @@ -22,11 +22,33 @@ import ( type Order[K comparable] struct { snapshot atomic.Pointer[[]K] mu sync.Mutex + + interval uint64 + pending atomic.Uint64 +} + +// Options configures a new Order. +type Options struct { + // Interval applies a reorder at most once per Interval + // eligible (non-front, member) Touch calls. + // + // A larger Interval decreases recency precision + // but uses fewer allocations. + // Each applied reorder allocates one snapshot, + // so throttling decreases the snapshot-allocation rate + // by roughly Interval. + // + // An Interval of 1 reorders on every eligible Touch. + Interval uint64 } -// New returns a new, empty order. -func New[K comparable]() *Order[K] { - return &Order[K]{} //nolint:exhaustruct +// New returns a new, empty order configured by opts. +func New[K comparable](opts Options) *Order[K] { + if opts.Interval == 0 { + panic("internal/mru: Options.Interval must be at least 1") + } + + return &Order[K]{interval: opts.Interval} //nolint:exhaustruct } // Len returns the number of keys in the order. diff --git a/internal/mru/order_test.go b/internal/mru/order_test.go index bf0d4f2e..2f16eac3 100644 --- a/internal/mru/order_test.go +++ b/internal/mru/order_test.go @@ -20,7 +20,7 @@ func set(keys ...string) map[string]struct{} { func TestTouchMovesToFront(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b", "c")) order.Touch("a") @@ -36,10 +36,60 @@ func TestTouchMovesToFront(t *testing.T) { } } +func TestIntervalThrottlesReorder(t *testing.T) { + t.Parallel() + + const interval = 4 + + order := mru.New[string](mru.Options{Interval: interval}) + order.Sync(set("a", "b", "c")) + + front := order.Keys()[0] + + other := "a" + if other == front { + other = "b" + } + + for range interval - 1 { + order.Touch(other) + + if got := order.Keys()[0]; got != front { + t.Fatalf("reordered early: front = %q, want %q", got, front) + } + } + + order.Touch(other) + + if got := order.Keys()[0]; got != other { + t.Fatalf("after interval touches, front = %q, want %q", got, other) + } +} + +func TestIntervalKeepsMembershipUnderReorder(t *testing.T) { + t.Parallel() + + order := mru.New[string](mru.Options{Interval: 8}) + order.Sync(set("a", "b", "c", "d")) + + for range 100 { + for _, key := range []string{"a", "b", "c", "d"} { + order.Touch(key) + } + } + + got := slices.Clone(order.Keys()) + slices.Sort(got) + + if want := []string{"a", "b", "c", "d"}; !slices.Equal(got, want) { + t.Fatalf("membership corrupted: %v", got) + } +} + func TestSyncDropsAbsentAndKeepsSurvivorOrder(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b", "c")) // Establish a deterministic recency order: c, b, a. @@ -61,7 +111,7 @@ func TestSyncDropsAbsentAndKeepsSurvivorOrder(t *testing.T) { func TestSyncPlacesNewKeysFirst(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b")) order.Touch("a") @@ -77,7 +127,7 @@ func TestSyncPlacesNewKeysFirst(t *testing.T) { func TestTouchAbsentIsNoOp(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b")) order.Touch("a") order.Touch("z") @@ -90,7 +140,7 @@ func TestTouchAbsentIsNoOp(t *testing.T) { func TestKeysIsConsistentSnapshot(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b")) snapshot := order.Keys() @@ -111,7 +161,7 @@ func TestKeysIsConsistentSnapshot(t *testing.T) { func TestConcurrentTouchAndKeys(t *testing.T) { t.Parallel() - order := mru.New[string]() + order := mru.New[string](mru.Options{Interval: 1}) order.Sync(set("a", "b", "c", "d")) var wg sync.WaitGroup diff --git a/internal/mru/touch.go b/internal/mru/touch.go index 67ac6706..420bbc4a 100644 --- a/internal/mru/touch.go +++ b/internal/mru/touch.go @@ -10,12 +10,21 @@ package mru // A contended attempt, // or a key that is not a member, // leaves the order unchanged. +// +// When the order has a reorder interval above 1, +// an eligible (non-front) Touch records its recency +// but applies the reorder only once per interval such calls; +// the recording itself is lock-free and allocation-free. func (order *Order[K]) Touch(key K) { keys := order.Keys() if len(keys) == 0 || keys[0] == key { return } + if order.interval > 1 && order.pending.Add(1)%order.interval != 0 { + return + } + if !order.mu.TryLock() { return } diff --git a/internal/progress/meter.go b/internal/progress/meter.go index e5e64fb4..9d4f1155 100644 --- a/internal/progress/meter.go +++ b/internal/progress/meter.go @@ -1,17 +1,22 @@ package progress import ( + "sync/atomic" "time" "lindenii.org/go/lgo/iowrap" ) const ( - updateInterval = time.Second + renderInterval = 100 * time.Millisecond + forceInterval = time.Second throughputInterval = 500 * time.Millisecond ) // Meter renders one in-place progress line. +// +// Add is safe for concurrent use; a single background goroutine renders. +// Stop must be called exactly once to flush the final line and release it. type Meter struct { writer iowrap.WriteFlusher @@ -21,24 +26,29 @@ type Meter struct { sparse bool throughput bool - startedAt time.Time - nextUpdateAt time.Time - nextThroughput time.Time + done atomic.Int64 + bytes atomic.Int64 + sawValue atomic.Bool - lastDone int - lastBytes int - lastPercent int - lastCounterW int - sawValue bool + startedAt time.Time + stop chan struct{} + exited chan struct{} + + // The following are owned by the render goroutine while it runs, + // then by Stop once exited is closed. + nextForceAt time.Time + nextThroughput time.Time + lastPercent int + lastCounterW int throughputSuffix string } -// New creates one progress meter. +// New creates one progress meter and starts its render goroutine. func New(opts Options) *Meter { now := time.Now() - return &Meter{ + meter := &Meter{ writer: opts.Writer, title: opts.Title, total: opts.Total, @@ -46,10 +56,20 @@ func New(opts Options) *Meter { sparse: opts.Sparse, throughput: opts.Throughput, startedAt: now, - nextUpdateAt: now.Add(updateInterval), + stop: make(chan struct{}), + exited: make(chan struct{}), + nextForceAt: now.Add(forceInterval), nextThroughput: now.Add(throughputInterval), lastPercent: -1, } + + if meter.writer != nil { + go meter.loop() + } else { + close(meter.exited) + } + + return meter } // Options configures one progress meter. @@ -67,59 +87,71 @@ type Options struct { Throughput bool } -// Set records current progress -// and renders when percent changed or the 1s tick elapsed. -func (meter *Meter) Set(done int, bytes int) { - meter.lastDone = done - meter.lastBytes = bytes - meter.sawValue = true +// Add increments the done and byte counters. +// +// Labels: MT-Safe. +func (meter *Meter) Add(done, bytes int64) { + meter.done.Add(done) + meter.bytes.Add(bytes) + meter.sawValue.Store(true) +} + +// Stop ends the render goroutine, forces the final line, and appends ", <msg>.". +func (meter *Meter) Stop(msg string) { + close(meter.stop) + <-meter.exited - if meter.writer == nil { + if !meter.sawValue.Load() || meter.writer == nil { return } - now := time.Now() - forced := meter.consumeUpdateTick(now) - - percentChanged := false - - if meter.total > 0 { - percent := int(int64(done) * 100 / int64(meter.total)) - percentChanged = percent != meter.lastPercent + if msg == "" { + msg = "done" } - if !percentChanged && !forced { - return + if meter.sparse && meter.total > 0 && int(meter.done.Load()) != meter.total { + meter.done.Store(int64(meter.total)) } - meter.render(now, "\r") + meter.render(time.Now(), ", "+msg+".\n") } -// Stop forces the final progress line and appends ", <msg>.". -func (meter *Meter) Stop(msg string) { - if !meter.sawValue || meter.writer == nil { - return - } +func (meter *Meter) loop() { + defer close(meter.exited) - if msg == "" { - msg = "done" + ticker := time.NewTicker(renderInterval) + defer ticker.Stop() + + for { + select { + case <-meter.stop: + return + case now := <-ticker.C: + meter.maybeRender(now) + } } +} - if meter.sparse && meter.total > 0 && meter.lastDone != meter.total { - meter.lastDone = meter.total +func (meter *Meter) maybeRender(now time.Time) { + if !meter.sawValue.Load() { + return } - meter.render(time.Now(), ", "+msg+".\n") -} + forced := false -func (meter *Meter) consumeUpdateTick(now time.Time) bool { - if now.Before(meter.nextUpdateAt) { - return false + for !now.Before(meter.nextForceAt) { + meter.nextForceAt = meter.nextForceAt.Add(forceInterval) + forced = true } - for !now.Before(meter.nextUpdateAt) { - meter.nextUpdateAt = meter.nextUpdateAt.Add(updateInterval) + percentChanged := false + + if meter.total > 0 { + percent := int(meter.done.Load() * 100 / int64(meter.total)) + percentChanged = percent != meter.lastPercent } - return true + if percentChanged || forced { + meter.render(now, "\r") + } } diff --git a/internal/progress/meter_test.go b/internal/progress/meter_test.go new file mode 100644 index 00000000..8fa09973 --- /dev/null +++ b/internal/progress/meter_test.go @@ -0,0 +1,43 @@ +package progress_test + +import ( + "bytes" + "strings" + "sync" + "testing" + "time" + + "lindenii.org/go/furgit/internal/progress" + "lindenii.org/go/lgo/iowrap" +) + +func TestMeterConcurrentAdd(t *testing.T) { + t.Parallel() + + var buf bytes.Buffer + + meter := progress.New(progress.Options{ + Writer: iowrap.NopFlush(&buf), + Title: "test", + Total: 1000, + }) + + var wg sync.WaitGroup + + for range 10 { + wg.Go(func() { + for range 100 { + meter.Add(1, 0) + time.Sleep(time.Millisecond) + } + }) + } + + wg.Wait() + + meter.Stop("done") + + if got := buf.String(); !strings.Contains(got, "100% (1000/1000)") { + t.Fatalf("final line = %q, want it to contain %q", got, "100% (1000/1000)") + } +} diff --git a/internal/progress/render.go b/internal/progress/render.go index 814ced98..a67d40ff 100644 --- a/internal/progress/render.go +++ b/internal/progress/render.go @@ -40,13 +40,15 @@ func (meter *Meter) render(now time.Time, eol string) { } func (meter *Meter) renderCounters() string { + done := meter.done.Load() + if meter.total > 0 { - meter.lastPercent = int(int64(meter.lastDone) * 100 / int64(meter.total)) + meter.lastPercent = int(done * 100 / int64(meter.total)) - return fmt.Sprintf("%3d%% (%d/%d)%s", meter.lastPercent, meter.lastDone, meter.total, meter.throughputSuffix) + return fmt.Sprintf("%3d%% (%d/%d)%s", meter.lastPercent, done, meter.total, meter.throughputSuffix) } - return fmt.Sprintf("%d%s", meter.lastDone, meter.throughputSuffix) + return fmt.Sprintf("%d%s", done, meter.throughputSuffix) } func (meter *Meter) refreshThroughput(now time.Time) { @@ -67,6 +69,7 @@ func (meter *Meter) refreshThroughput(now time.Time) { return } - rate := uint64(float64(meter.lastBytes) / elapsed.Seconds()) - meter.throughputSuffix = ", " + humanize.Bytes(uint64(meter.lastBytes)) + " | " + humanize.Bytes(rate) + "/s" //nolint:gosec + bytes := meter.bytes.Load() + rate := uint64(float64(bytes) / elapsed.Seconds()) + meter.throughputSuffix = ", " + humanize.Bytes(uint64(bytes)) + " | " + humanize.Bytes(rate) + "/s" //#nosec G115 } diff --git a/internal/testgit/command.go b/internal/testgit/command.go index db874bd1..4fc8ab17 100644 --- a/internal/testgit/command.go +++ b/internal/testgit/command.go @@ -15,7 +15,7 @@ func (repo *Repo) command( ) *exec.Cmd { tb.Helper() - cmd := exec.CommandContext(tb.Context(), command, args...) //nolint:gosec // Test helper runs caller-selected commands. + cmd := exec.CommandContext(tb.Context(), command, args...) cmd.Dir = repo.path cmd.Env = repo.env diff --git a/internal/testgit/tree.go b/internal/testgit/tree.go index 501c7949..ff9b1918 100644 --- a/internal/testgit/tree.go +++ b/internal/testgit/tree.go @@ -50,7 +50,7 @@ func (repo *Repo) LsTree(tb testing.TB, oid id.ObjectID) ([]TreeEntry, error) { return nil, fmt.Errorf("ls-tree: %w", err) } - var entries []TreeEntry + entries := make([]TreeEntry, 0, bytes.Count(stdout, []byte{0})) for record := range bytes.SplitSeq(stdout, []byte{0}) { if len(record) == 0 { diff --git a/object/blob/clone.go b/object/blob/clone.go new file mode 100644 index 00000000..7106c3aa --- /dev/null +++ b/object/blob/clone.go @@ -0,0 +1,11 @@ +package blob + +import "bytes" + +// Clone returns a deep copy of the blob +// whose Data is independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (blob *Blob) Clone() *Blob { + return &Blob{Data: bytes.Clone(blob.Data)} +} diff --git a/object/blob/parse.go b/object/blob/parse.go index c013af96..1796d42f 100644 --- a/object/blob/parse.go +++ b/object/blob/parse.go @@ -2,7 +2,13 @@ package blob // Parse decodes a blob object body. // -// Labels: Life-Independent. +// The returned blob aliases body: +// its Data shares the same backing array, +// so the blob inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Blob.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte) (*Blob, error) { - return &Blob{Data: append([]byte(nil), body...)}, nil + return &Blob{Data: body}, nil } diff --git a/object/commit/append.go b/object/commit/append.go index d5258b97..02d69058 100644 --- a/object/commit/append.go +++ b/object/commit/append.go @@ -33,7 +33,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) { dst = append(dst, byte('\n')) - if commit.ChangeID != "" { + if len(commit.ChangeID) != 0 { dst = append(dst, []byte("change-id ")...) dst = append(dst, commit.ChangeID...) dst = append(dst, byte('\n')) @@ -41,7 +41,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) { for _, h := range commit.ExtraHeaders { // GIGO on empty keys and such. - dst = append(dst, []byte(h.Key)...) + dst = append(dst, h.Key...) dst = append(dst, byte(' ')) dst = append(dst, h.Value...) dst = append(dst, byte('\n')) diff --git a/object/commit/clone.go b/object/commit/clone.go new file mode 100644 index 00000000..08987f26 --- /dev/null +++ b/object/commit/clone.go @@ -0,0 +1,33 @@ +package commit + +import ( + "bytes" + "slices" +) + +// Clone returns a deep copy of the commit +// whose byte fields are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (commit *Commit) Clone() *Commit { + clone := &Commit{ + Tree: commit.Tree, + Parents: slices.Clone(commit.Parents), + Author: commit.Author.Clone(), + Committer: commit.Committer.Clone(), + Message: bytes.Clone(commit.Message), + ChangeID: bytes.Clone(commit.ChangeID), + } + + if commit.ExtraHeaders != nil { + clone.ExtraHeaders = make([]ExtraHeader, len(commit.ExtraHeaders)) + for i, h := range commit.ExtraHeaders { + clone.ExtraHeaders[i] = ExtraHeader{ + Key: bytes.Clone(h.Key), + Value: bytes.Clone(h.Value), + } + } + } + + return clone +} diff --git a/object/commit/commit.go b/object/commit/commit.go index 6a89bce9..a8a247bf 100644 --- a/object/commit/commit.go +++ b/object/commit/commit.go @@ -14,12 +14,12 @@ type Commit struct { Author signature.Signature Committer signature.Signature Message []byte - ChangeID string + ChangeID []byte ExtraHeaders []ExtraHeader } // ExtraHeader represents an extra header in a Git object. type ExtraHeader struct { - Key string + Key []byte Value []byte } diff --git a/object/commit/parse.go b/object/commit/parse.go index 20353e14..74f607f1 100644 --- a/object/commit/parse.go +++ b/object/commit/parse.go @@ -13,6 +13,16 @@ import ( var ErrInvalidCommit = errors.New("object/commit: invalid commit") // Parse decodes a commit object body. +// +// The returned commit aliases body: +// its Message, ChangeID, and extra-header fields, +// along with the byte fields of its signatures, +// share body's backing array. +// The commit inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Commit.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { c := new(Commit) @@ -95,7 +105,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { return nil, fmt.Errorf("%w: unexpected change-id header at offset %d", ErrInvalidCommit, lineStart) } - c.ChangeID = string(value) + c.ChangeID = value case "gpgsig", "gpgsig-sha256": if state != parseStateExtra { return nil, fmt.Errorf("%w: unexpected %s header at offset %d", ErrInvalidCommit, key, lineStart) @@ -119,8 +129,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { } c.ExtraHeaders = append(c.ExtraHeaders, ExtraHeader{ - Key: string(key), - Value: append([]byte(nil), value...), + Key: key, + Value: value, }) } } @@ -141,7 +151,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { panic("unreachable parse state") } - c.Message = append([]byte(nil), body[i:]...) + c.Message = body[i:] return c, nil } diff --git a/object/commit/roundtrip_test.go b/object/commit/roundtrip_test.go index faa8a834..1cfcaaac 100644 --- a/object/commit/roundtrip_test.go +++ b/object/commit/roundtrip_test.go @@ -87,10 +87,10 @@ func TestRoundTrip(t *testing.T) { OffsetMinutes: 330, }, Message: []byte("roundtrip subject\n\nroundtrip body\n\n"), - ChangeID: "zyxwvutsrqponmlk", + ChangeID: []byte("zyxwvutsrqponmlk"), ExtraHeaders: []commit.ExtraHeader{ - {Key: "encoding", Value: []byte("UTF-8")}, - {Key: "x-test-header", Value: []byte("value")}, + {Key: []byte("encoding"), Value: []byte("UTF-8")}, + {Key: []byte("x-test-header"), Value: []byte("value")}, }, } @@ -145,7 +145,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) { t.Fatalf("message = %q, want %q", got.Message, want.Message) } - if got.ChangeID != want.ChangeID { + if !bytes.Equal(got.ChangeID, want.ChangeID) { t.Fatalf("change id = %q, want %q", got.ChangeID, want.ChangeID) } @@ -155,7 +155,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) { for i, wantHeader := range want.ExtraHeaders { gotHeader := got.ExtraHeaders[i] - if gotHeader.Key != wantHeader.Key { + if !bytes.Equal(gotHeader.Key, wantHeader.Key) { t.Fatalf("extra header[%d] key = %q, want %q", i, gotHeader.Key, wantHeader.Key) } diff --git a/object/fetch/path.go b/object/fetch/path.go index f8eca507..e8b12481 100644 --- a/object/fetch/path.go +++ b/object/fetch/path.go @@ -47,7 +47,7 @@ func (err *PathNotTreeError) Error() string { // for an io/fs.FS-like interface. // // Labels: Life-Parent. -func (fetcher *Fetcher) Path(root oid.ObjectID, parts []string) (tree.Entry, error) { +func (fetcher *Fetcher) Path(root oid.ObjectID, parts [][]byte) (tree.Entry, error) { if len(parts) == 0 { return tree.Entry{}, ErrPathInvalid } diff --git a/object/fetch/treefs.go b/object/fetch/treefs.go index 9d88abb2..d12e3dd6 100644 --- a/object/fetch/treefs.go +++ b/object/fetch/treefs.go @@ -1,11 +1,11 @@ package fetch import ( + "bytes" "errors" "fmt" "io" "io/fs" - "strings" "time" oid "lindenii.org/go/furgit/object/id" @@ -48,12 +48,12 @@ var ErrGitlinkNotFile = fmt.Errorf("%w: object/fetch: gitlink entries are not re // generic fs consumers classify it correctly. var ErrIsDirectory = fmt.Errorf("%w: object/fetch: is a directory", fs.ErrInvalid) -func splitPath(path string) []string { +func splitPath(path string) [][]byte { if len(path) == 0 { return nil } - return strings.Split(path, "/") + return bytes.Split([]byte(path), []byte("/")) } type treeEntryValue struct { @@ -197,7 +197,7 @@ func (treeFS *TreeFS) Open(name string) (fs.File, error) { entries := make([]fs.DirEntry, 0, len(tree.Object().Entries())) for _, child := range tree.Object().Entries() { childEntry := treeEntryValue{ - name: child.Name, + name: string(child.Name), mode: child.Mode, objectID: child.ID, treeEntry: &child, @@ -401,7 +401,7 @@ func (treeFS *TreeFS) resolvePath(op treeFSOp, name string) (treeEntryValue, err } return treeEntryValue{ - name: entry.Name, + name: string(entry.Name), mode: entry.Mode, objectID: entry.ID, treeEntry: &entry, diff --git a/object/fetch/treefs_test.go b/object/fetch/treefs_test.go index ba292276..05240823 100644 --- a/object/fetch/treefs_test.go +++ b/object/fetch/treefs_test.go @@ -35,12 +35,12 @@ func TestTreeFS(t *testing.T) { } subTreeID := writeTree(t, store, []tree.Entry{ - {Mode: mode.Executable, Name: "exec.sh", ID: execID}, + {Mode: mode.Executable, Name: []byte("exec.sh"), ID: execID}, }) rootTreeID := writeTree(t, store, []tree.Entry{ - {Mode: mode.Regular, Name: "plain.txt", ID: plainID}, - {Mode: mode.Directory, Name: "dir", ID: subTreeID}, + {Mode: mode.Regular, Name: []byte("plain.txt"), ID: plainID}, + {Mode: mode.Directory, Name: []byte("dir"), ID: subTreeID}, }) commitID := writeCommit(t, store, rootTreeID) diff --git a/object/parse.go b/object/parse.go index f9779171..2a6c629f 100644 --- a/object/parse.go +++ b/object/parse.go @@ -20,8 +20,6 @@ var ErrSizeMismatch = errors.New("object: size mismatch") // ParseWithHeader parses a loose object // in "type size\x00body" format. -// -//nolint:ireturn func ParseWithHeader(raw []byte, objectFormat id.ObjectFormat) (Object, error) { ty, size, headerLen, err := header.Parse(raw) if err != nil { @@ -37,8 +35,6 @@ func ParseWithHeader(raw []byte, objectFormat id.ObjectFormat) (Object, error) { } // ParseWithoutHeader parses a typed object body. -// -//nolint:ireturn func ParseWithoutHeader(ty typ.Type, body []byte, objectFormat id.ObjectFormat) (Object, error) { switch ty { case typ.Blob: diff --git a/object/signature/clone.go b/object/signature/clone.go new file mode 100644 index 00000000..4637a258 --- /dev/null +++ b/object/signature/clone.go @@ -0,0 +1,16 @@ +package signature + +import "bytes" + +// Clone returns a deep copy of the signature +// whose Name and Email are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (signature Signature) Clone() Signature { + return Signature{ + Name: bytes.Clone(signature.Name), + Email: bytes.Clone(signature.Email), + WhenUnix: signature.WhenUnix, + OffsetMinutes: signature.OffsetMinutes, + } +} diff --git a/object/signature/parse.go b/object/signature/parse.go index b39100cd..190d2cf4 100644 --- a/object/signature/parse.go +++ b/object/signature/parse.go @@ -10,7 +10,13 @@ import ( // Parse parses a canonical Git signature line. // -// Labels: Life-Independent. +// The returned signature aliases line: +// its Name and Email share line's backing array, +// so the signature inherits line's lifetime +// and must not be mutated unless line may be. +// Use [Signature.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(line []byte) (*Signature, error) { lt := bytes.IndexByte(line, '<') if lt < 0 { @@ -24,8 +30,8 @@ func Parse(line []byte) (*Signature, error) { gt := lt + 1 + gtRel - nameBytes := append([]byte(nil), bytes.TrimRight(line[:lt], " ")...) - emailBytes := append([]byte(nil), line[lt+1:gt]...) + nameBytes := bytes.TrimRight(line[:lt], " ") + emailBytes := line[lt+1 : gt] rest := line[gt+1:] if len(rest) == 0 || rest[0] != ' ' { diff --git a/object/store/dual/quarantine.go b/object/store/dual/quarantine.go index b73e48fe..6052c134 100644 --- a/object/store/dual/quarantine.go +++ b/object/store/dual/quarantine.go @@ -1,6 +1,7 @@ package dual import ( + "context" "errors" "fmt" "io" @@ -12,15 +13,11 @@ import ( ) // BeginObjectQuarantine begins an object-wise quarantine on the object side. -// -//nolint:ireturn func (dual *Dual) BeginObjectQuarantine(opts store.ObjectQuarantineOptions) (store.ObjectQuarantine, error) { return dual.object.BeginObjectQuarantine(opts) //nolint:wrapcheck } // BeginPackQuarantine begins a pack-wise quarantine on the pack side. -// -//nolint:ireturn func (dual *Dual) BeginPackQuarantine(opts store.PackQuarantineOptions) (store.PackQuarantine, error) { return dual.pack.BeginPackQuarantine(opts) //nolint:wrapcheck } @@ -29,8 +26,6 @@ func (dual *Dual) BeginPackQuarantine(opts store.PackQuarantineOptions) (store.P // // If the pack side fails to begin, // the already-begun object side is discarded before returning. -// -//nolint:ireturn func (dual *Dual) BeginCoordinatedQuarantine(opts store.CoordinatedQuarantineOptions) (store.CoordinatedQuarantine, error) { objectQ, err := dual.object.BeginObjectQuarantine(opts.Object) if err != nil { @@ -111,8 +106,8 @@ func (quarantine *coordinatedQuarantine) WriteReaderContent(ty typ.Type, size in return quarantine.objectQ.WriteReaderContent(ty, size, src) //nolint:wrapcheck } -func (quarantine *coordinatedQuarantine) WritePack(src io.Reader, opts store.PackWriteOptions) error { - return quarantine.packQ.WritePack(src, opts) //nolint:wrapcheck +func (quarantine *coordinatedQuarantine) WritePack(ctx context.Context, src io.Reader, opts store.PackWriteOptions) error { + return quarantine.packQ.WritePack(ctx, src, opts) //nolint:wrapcheck } // Promote publishes both halves and joins their errors. diff --git a/object/store/dual/writer.go b/object/store/dual/writer.go index f75f49e1..fb59adbe 100644 --- a/object/store/dual/writer.go +++ b/object/store/dual/writer.go @@ -1,6 +1,7 @@ package dual import ( + "context" "io" "lindenii.org/go/furgit/object/id" @@ -29,6 +30,6 @@ func (dual *Dual) WriteReaderContent(ty typ.Type, size int, src io.Reader) (id.O } // WritePack ingests one pack stream into the pack side. -func (dual *Dual) WritePack(src io.Reader, opts store.PackWriteOptions) error { - return dual.pack.WritePack(src, opts) //nolint:wrapcheck +func (dual *Dual) WritePack(ctx context.Context, src io.Reader, opts store.PackWriteOptions) error { + return dual.pack.WritePack(ctx, src, opts) //nolint:wrapcheck } diff --git a/object/store/loose/quarantine.go b/object/store/loose/quarantine.go index 214f7219..cd337670 100644 --- a/object/store/loose/quarantine.go +++ b/object/store/loose/quarantine.go @@ -30,7 +30,7 @@ type objectQuarantine struct { // beneath the destination loose root. // // Labels: Deps-Borrowed, Life-Parent, Close-No. -func (loose *Loose) BeginObjectQuarantine(_ store.ObjectQuarantineOptions) (store.ObjectQuarantine, error) { //nolint:ireturn +func (loose *Loose) BeginObjectQuarantine(_ store.ObjectQuarantineOptions) (store.ObjectQuarantine, error) { tempName, tempRoot, err := createLooseQuarantineRoot(loose.root) if err != nil { return nil, err diff --git a/object/store/memory/reader.go b/object/store/memory/reader.go index 6b8fae55..e04ad759 100644 --- a/object/store/memory/reader.go +++ b/object/store/memory/reader.go @@ -24,13 +24,15 @@ func (memory *Memory) ReadBytesFull(id id.ObjectID) ([]byte, error) { } // ReadBytesContent reads one object body. +// +// The returned slice aliases the store's own copy of the object content. func (memory *Memory) ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error) { obj, ok := memory.objects.Load(id) if !ok { return typ.Unknown, nil, store.ErrObjectNotFound } - return obj.ty, append([]byte(nil), obj.content...), nil + return obj.ty, obj.content, nil } // ReadHeader reads one object header. diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go index 2e8e926b..b048fe86 100644 --- a/object/store/mix/mix.go +++ b/object/store/mix/mix.go @@ -28,7 +28,7 @@ func New(backends ...store.ObjectReader) *Mix { present[backend] = struct{}{} } - order := mru.New[store.ObjectReader]() + order := mru.New[store.ObjectReader](mru.Options{Interval: 48}) order.Sync(present) return &Mix{ diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go index 567fd679..5b538221 100644 --- a/object/store/packed/delta.go +++ b/object/store/packed/delta.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "fmt" "io" "slices" @@ -11,8 +10,14 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packfile/delta" "lindenii.org/go/lgo/intconv" + "lindenii.org/go/lgo/sync" ) +//nolint:gochecknoglobals +var deltaHeaderPool = sync.NewPool(func() *[delta.MaxHeaderSizesLen]byte { + return new([delta.MaxHeaderSizesLen]byte) +}) + // deltaNode is a delta entry on a resolution chain. type deltaNode struct { // payload is the entry's compressed delta payload view. @@ -28,7 +33,11 @@ type deltaNode struct { // unpackEntry reconstructs the object stored at offset in p, // following ref- and ofs-delta chains within the pack. // -// Labels: Life-Independent. +// A direct base-cache hit returns the shared cache buffer itself, +// so the result may alias cache storage and must not be mutated; +// delta-applied results are freshly allocated. +// +// Labels: Life-Parent, Mut-No. func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []byte, error) { var zero packfile.EntryType @@ -86,9 +95,11 @@ func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []by cur = baseOffset } - // A direct cache hit with no deltas to apply must be copied. + // A direct cache hit with no deltas to apply + // returns the shared cache buffer directly; + // callers are contractually Mut-No. if len(chain) == 0 && fromCache { - return baseType, bytes.Clone(base), nil + return baseType, base, nil } // Apply deltas back up the chain, caching each consumed base. @@ -202,16 +213,19 @@ func (packed *Packed) resolveType(p *pack, offset int, entryHeader packfile.Entr // deltaResultSize reads the declared result size // from one compressed delta payload prefix. func deltaResultSize(payload []byte, deltaSize uint64) (int, error) { - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return 0, fmt.Errorf("reading delta header: %w", err) } defer func() { _ = zr.Close() }() + buf := deltaHeaderPool.Get() + defer deltaHeaderPool.Put(buf) + prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize) - prefix := make([]byte, prefixLen) + prefix := buf[:prefixLen] _, err = io.ReadFull(zr, prefix) if err != nil { diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go index e9d45bb4..908afad0 100644 --- a/object/store/packed/entry.go +++ b/object/store/packed/entry.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "errors" "fmt" "io" @@ -49,7 +48,7 @@ func inflate(payload []byte, expectedSize uint64) ([]byte, error) { return nil, fmt.Errorf("declared size: %w", err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return nil, fmt.Errorf("inflating entry payload: %w", err) } diff --git a/object/store/packed/internal/ingest/basecache.go b/object/store/packed/internal/ingest/basecache.go new file mode 100644 index 00000000..77419aa7 --- /dev/null +++ b/object/store/packed/internal/ingest/basecache.go @@ -0,0 +1,25 @@ +package ingest + +import ( + "lindenii.org/go/furgit/internal/cache/clock" + "lindenii.org/go/furgit/object/typ" +) + +const baseCacheMaxWeight = 96 << 20 + +type baseCacheKey struct { + offset int +} + +type cachedContent struct { + objectType typ.Type + content []byte +} + +func newBaseCache(workers int) *clock.Clock[baseCacheKey, cachedContent] { + return clock.New(baseCacheMaxWeight*uint64(workers), baseContentWeight) //#nosec G115 +} + +func baseContentWeight(_ baseCacheKey, base cachedContent) uint64 { + return uint64(len(base.content)) + 32 +} diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go index f0ab6622..c6b1e2c9 100644 --- a/object/store/packed/internal/ingest/finalize.go +++ b/object/store/packed/internal/ingest/finalize.go @@ -8,6 +8,7 @@ import ( "slices" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/format/packrev" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -17,6 +18,11 @@ import ( // then links the pack, reverse index, and index // to their content-addressed names. func (ingestion *ingestion) finalize() (Result, error) { + err := ingestion.ctx.Err() + if err != nil { + return Result{}, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + entries, positions, err := ingestion.indexEntries() if err != nil { return Result{}, err @@ -38,24 +44,16 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } - base := "pack-" + ingestion.packHash.String() - packFinal := base + ".pack" - idxFinal := base + ".idx" - revFinal := base + ".rev" - - // Link the pack and reverse index before the index, - // since the index is what publishes the pack to readers. - err = ingestion.link(ingestion.packTmp, packFinal) + bloomBuilder, err := ingestion.buildBloom(entries, packHash) if err != nil { return Result{}, err } - err = ingestion.link(revTmp, revFinal) - if err != nil { - return Result{}, err - } + bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error { + _, err := w.Write(bloomBuilder.Bytes()) - err = ingestion.link(idxTmp, idxFinal) + return err + }) if err != nil { return Result{}, err } @@ -65,16 +63,70 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } + base := "pack-" + ingestion.packHash.String() + packFinal := base + ".pack" + idxFinal := base + ".idx" + revFinal := base + ".rev" + bloomFinal := base + ".bloom" + + // Link the data files before the index, + // since the index is what publishes the pack to readers. + artifacts := [...]struct{ tmp, final string }{ + {ingestion.packTmp, packFinal}, + {revTmp, revFinal}, + {bloomTmp, bloomFinal}, + {idxTmp, idxFinal}, + } + + var created []string + + for _, artifact := range artifacts { + linked, err := ingestion.promote(artifact.tmp, artifact.final) + if err != nil { + for i := len(created) - 1; i >= 0; i-- { + _ = ingestion.root.Remove(created[i]) + } + + return Result{}, err + } + + if linked { + created = append(created, artifact.final) + } + } + return Result{ PackName: packFinal, IdxName: idxFinal, RevName: revFinal, + BloomName: bloomFinal, PackHash: ingestion.packHash, ObjectCount: objectCount, ThinFixed: ingestion.thinFixed, }, nil } +// buildBloom builds a Bloom filter over the index entries' object IDs, +// bound to packHash. +func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) { + bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries)) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + size := ingestion.objectFormat.Size() + for i := range entries { + builder.Add(entries[i].OID[:size]) + } + + return builder, nil +} + // indexEntries returns the index entries in object-ID order // and, for each record in pack order, its position in that index order. func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) { @@ -141,15 +193,21 @@ func (ingestion *ingestion) writeTemp(prefix string, write func(io.Writer) error return name, nil } -// link hard-links tmp to final, -// treating an already-present destination as success. -func (ingestion *ingestion) link(tmp, final string) error { +// promote hard-links tmp to final and reports whether final was newly created. +// A pre-existing final is treated as success; rollback must not remove it. +func (ingestion *ingestion) promote(tmp, final string) (bool, error) { err := ingestion.root.Link(tmp, final) - if err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("object/store/packed/internal/ingest: linking %q: %w", final, err) - } - _ = ingestion.root.Remove(tmp) + switch { + case err == nil: + _ = ingestion.root.Remove(tmp) - return nil + return true, nil + case errors.Is(err, fs.ErrExist): + _ = ingestion.root.Remove(tmp) + + return false, nil + default: + return false, fmt.Errorf("object/store/packed/internal/ingest: linking %q: %w", final, err) + } } diff --git a/object/store/packed/internal/ingest/ingest.go b/object/store/packed/internal/ingest/ingest.go index 5422b4af..9b60af85 100644 --- a/object/store/packed/internal/ingest/ingest.go +++ b/object/store/packed/internal/ingest/ingest.go @@ -2,22 +2,29 @@ package ingest import ( "bytes" + "context" "crypto/rand" "errors" "fmt" "io" "io/fs" "os" + "runtime" + "sync/atomic" + "lindenii.org/go/furgit/internal/cache/clock" "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/store" + "lindenii.org/go/lgo/sync" ) var errTempNamesExhausted = errors.New("object/store/packed/internal/ingest: exhausted temporary file names") // ingestion holds the state for one WritePack call. type ingestion struct { + ctx context.Context //nolint:containedctx + // root is the destination objects/pack directory. root *os.Root @@ -47,7 +54,12 @@ type ingestion struct { // byOffset maps an entry offset to its record index, // and byOID maps a resolved object ID to its record index. byOffset map[int]int - byOID map[id.ObjectID]int + byOID sync.Map[id.ObjectID, int] + + baseCache *clock.Clock[baseCacheKey, cachedContent] + + // workers is the delta-resolution concurrency. + workers int // headerCount is the object count declared by the pack header. headerCount int @@ -55,8 +67,8 @@ type ingestion struct { // deltaCount counts delta records, accumulated during scanning. deltaCount int - // deltasResolved counts resolved delta records, for progress. - deltasResolved int + // deltasResolved counts resolved delta records. + deltasResolved atomic.Int64 // packHash is the final pack trailer hash. packHash id.ObjectID @@ -81,7 +93,7 @@ type ingestion struct { // The pack must be the last thing the peer sends before that response: // any bytes arriving immediately after the trailer // are rejected as a malformed pack. -func WritePack(root *os.Root, objectFormat id.ObjectFormat, src io.Reader, opts store.PackWriteOptions) (Result, error) { +func WritePack(ctx context.Context, root *os.Root, objectFormat id.ObjectFormat, src io.Reader, opts store.PackWriteOptions) (Result, error) { if objectFormat.Size() == 0 { return Result{}, id.ErrInvalidObjectFormat } @@ -91,24 +103,27 @@ func WritePack(root *os.Root, objectFormat id.ObjectFormat, src io.Reader, opts return Result{}, err } + workers := runtime.GOMAXPROCS(0) + ingestion := &ingestion{ - root: root, - objectFormat: objectFormat, - opts: opts, - src: src, - packFile: nil, - packTmp: "", - temps: nil, - scanner: nil, - records: nil, - byOffset: make(map[int]int), - byOID: make(map[id.ObjectID]int), - headerCount: count, - deltaCount: 0, - deltasResolved: 0, - packHash: id.ObjectID{}, - thinFixed: false, - committed: false, + ctx: ctx, + root: root, + objectFormat: objectFormat, + opts: opts, + src: src, + packFile: nil, + packTmp: "", + temps: nil, + scanner: nil, + records: nil, + byOffset: make(map[int]int), + baseCache: newBaseCache(workers), + workers: workers, + headerCount: count, + deltaCount: 0, + packHash: id.ObjectID{}, + thinFixed: false, + committed: false, } defer ingestion.cleanup() diff --git a/object/store/packed/internal/ingest/record.go b/object/store/packed/internal/ingest/record.go index 69101293..4031a246 100644 --- a/object/store/packed/internal/ingest/record.go +++ b/object/store/packed/internal/ingest/record.go @@ -37,15 +37,11 @@ type record struct { // baseOID is the base object ID for a ref-delta. baseOID id.ObjectID - // objectType is the resolved object type, - // meaningful once resolved is true. - objectType packfile.EntryType - // oid is the resolved object ID, // meaningful once resolved is true. oid id.ObjectID - // resolved reports whether oid and objectType are final. + // resolved reports whether oid is final. resolved bool } diff --git a/object/store/packed/internal/ingest/resolve.go b/object/store/packed/internal/ingest/resolve.go index 8595d366..dd26cd47 100644 --- a/object/store/packed/internal/ingest/resolve.go +++ b/object/store/packed/internal/ingest/resolve.go @@ -3,6 +3,7 @@ package ingest import ( "fmt" "io" + "sync" "lindenii.org/go/furgit/internal/compress/zlib" "lindenii.org/go/furgit/internal/format/packfile" @@ -10,6 +11,8 @@ import ( "lindenii.org/go/furgit/internal/progress" "lindenii.org/go/furgit/object/header" "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" + "lindenii.org/go/furgit/object/typ" ) // adjacency maps each resolvable base to its delta children: @@ -83,108 +86,311 @@ func (ingestion *ingestion) buildAdjacency() adjacency { return out } -// resolveFrom resolves the delta subtree rooted at each resolved record. +// item is a delta record awaiting resolution, with its delta-chain depth. +type item struct { + index int + depth int +} + +// resolver resolves deltas concurrently over a shared LIFO work stack. +// +// Each item is one delta child; +// a worker materializes its base from the cache, re-deriving on a miss, +// resolves the child, +// and pushes the child's own delta children. +// Workers park while the stack is empty but others are still working, +// and exit once it is empty and none are. +type resolver struct { + ingestion *ingestion + adjacency adjacency + meter *progress.Meter + + mu sync.Mutex + cond *sync.Cond + stack []item + active int + firstErr error +} + func (ingestion *ingestion) resolveFrom(roots []int, adjacency adjacency, meter *progress.Meter) error { + var seed []item + for _, root := range roots { - content, err := ingestion.inflateRecord(root) - if err != nil { - return err + rec := &ingestion.records[root] + for _, group := range [2][]int{adjacency.byOffset[rec.offset], adjacency.byOID[rec.oid]} { + for _, child := range group { + seed = append(seed, item{index: child, depth: 1}) + } } + } - err = ingestion.resolveSubtree(root, content, ingestion.records[root].objectType, 0, adjacency, meter) - if err != nil { - return err - } + if len(seed) == 0 { + return nil } - return nil + res := &resolver{ + ingestion: ingestion, + adjacency: adjacency, + meter: meter, + stack: seed, + } + res.cond = sync.NewCond(&res.mu) + + return res.run(ingestion.workers) } -// resolveSubtree resolves every delta child of one resolved record at depth, -// holding the record's content as the base for its children. -func (ingestion *ingestion) resolveSubtree( - index int, - content []byte, - objectType packfile.EntryType, - depth int, - adjacency adjacency, - meter *progress.Meter, -) error { - rec := &ingestion.records[index] +func (res *resolver) run(workers int) error { + if workers <= 1 { + res.worker() - for _, child := range adjacency.byOffset[rec.offset] { - err := ingestion.resolveChild(child, content, objectType, depth+1, adjacency, meter) - if err != nil { - return err + return res.firstErr + } + + var wg sync.WaitGroup + + for range workers { + wg.Go(func() { + res.worker() + }) + } + + wg.Wait() + + return res.firstErr +} + +func (res *resolver) worker() { + for { + res.mu.Lock() + + for len(res.stack) == 0 && res.active > 0 && res.firstErr == nil { + res.cond.Wait() + } + + if res.firstErr != nil || len(res.stack) == 0 { + res.mu.Unlock() + + return + } + + it := res.stack[len(res.stack)-1] + res.stack = res.stack[:len(res.stack)-1] + res.active++ + res.mu.Unlock() + + kids, err := res.process(it) + + res.mu.Lock() + res.active-- + + if err != nil && res.firstErr == nil { + res.firstErr = err + } + + if res.firstErr == nil { + res.stack = append(res.stack, kids...) } + + if res.firstErr != nil || len(kids) > 0 || (res.active == 0 && len(res.stack) == 0) { + res.cond.Broadcast() + } + + res.mu.Unlock() } +} - for _, child := range adjacency.byOID[rec.oid] { - err := ingestion.resolveChild(child, content, objectType, depth+1, adjacency, meter) - if err != nil { - return err +// process resolves one delta child and returns its own delta children. +func (res *resolver) process(it item) ([]item, error) { + err := res.ingestion.ctx.Err() + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + rec := &res.ingestion.records[it.index] + + parent, ok := res.ingestion.baseRecordIndex(rec) + if !ok { + return nil, fmt.Errorf("%w: entry at %d: base unavailable while resolving", ErrMalformedPack, rec.offset) + } + + baseType, baseContent, err := res.ingestion.materialize(parent) + if err != nil { + return nil, err + } + + err = res.ingestion.resolveOneChild(it.index, baseType, baseContent, res.meter) + if err != nil { + return nil, err + } + + return res.childItems(it.index, it.depth+1) +} + +// childItems returns the delta children of a just-resolved record at depth. +func (res *resolver) childItems(index, depth int) ([]item, error) { + rec := &res.ingestion.records[index] + + var kids []item + + for _, group := range [2][]int{res.adjacency.byOffset[rec.offset], res.adjacency.byOID[rec.oid]} { + for _, child := range group { + if depth > delta.MaxChainDepth { + return nil, fmt.Errorf("%w: entry at %d: delta chain too deep", ErrMalformedPack, res.ingestion.records[child].offset) + } + + kids = append(kids, item{index: child, depth: depth}) } } - return nil + return kids, nil } -// resolveChild applies one delta record at depth against its base content, -// finalizes the record, and recurses into its own children. -func (ingestion *ingestion) resolveChild( - index int, - baseContent []byte, - baseType packfile.EntryType, - depth int, - adjacency adjacency, - meter *progress.Meter, -) error { +func (ingestion *ingestion) resolveOneChild(index int, baseType typ.Type, baseContent []byte, meter *progress.Meter) error { rec := &ingestion.records[index] - if rec.resolved { - return nil + + content, err := ingestion.applyDelta(index, baseContent) + if err != nil { + return err + } + + oid, err := ingestion.hashObject(baseType, content) + if err != nil { + return err + } + + rec.oid = oid + rec.resolved = true + + ingestion.byOID.Store(oid, index) + ingestion.baseCache.Add(baseCacheKey{offset: rec.offset}, cachedContent{objectType: baseType, content: content}) + + ingestion.deltasResolved.Add(1) + meter.Add(1, 0) + + return nil +} + +// materialize returns the inflated content of an already-resolved record, +// from the base cache, +// or re-derived from the nearest cached or base ancestor on a miss. +func (ingestion *ingestion) materialize(index int) (typ.Type, []byte, error) { + var ( + zero typ.Type + chain []int + base []byte + baseType typ.Type + ) + + cur := index + + for { + rec := &ingestion.records[cur] + + if cached, ok := ingestion.baseCache.Get(baseCacheKey{offset: rec.offset}); ok { + base = cached.content + baseType = cached.objectType + + break + } + + if rec.packedType.IsBase() { + objectType, err := rec.packedType.ObjectType() + if err != nil { + return zero, nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + content, err := ingestion.inflateRecord(cur) + if err != nil { + return zero, nil, err + } + + base = content + baseType = objectType + + break + } + + if len(chain) >= delta.MaxChainDepth { + return zero, nil, fmt.Errorf("%w: entry at %d: delta chain too deep", ErrMalformedPack, rec.offset) + } + + chain = append(chain, cur) + + next, ok := ingestion.baseRecordIndex(rec) + if !ok { + return zero, nil, fmt.Errorf("%w: entry at %d: base unavailable while reconstructing", ErrMalformedPack, rec.offset) + } + + cur = next } - if depth > delta.MaxChainDepth { - return fmt.Errorf("%w: entry at %d: delta chain too deep", ErrMalformedPack, rec.offset) + for i := len(chain) - 1; i >= 0; i-- { + content, err := ingestion.applyDelta(chain[i], base) + if err != nil { + return zero, nil, err + } + + ingestion.baseCache.Add(baseCacheKey{offset: ingestion.records[chain[i]].offset}, cachedContent{objectType: baseType, content: content}) + + base = content } + return baseType, base, nil +} + +func (ingestion *ingestion) applyDelta(index int, baseContent []byte) ([]byte, error) { + rec := &ingestion.records[index] + deltaPayload, err := ingestion.inflateRecord(index) if err != nil { - return err + return nil, err } baseSize, resultSize, _, err := delta.ParseHeaderSizes(deltaPayload) if err != nil { - return fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) + return nil, fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) + } + + limit := ingestion.opts.MaxObjectSize + if limit > 0 && resultSize > uint64(limit) { + return nil, fmt.Errorf("%w: entry at %d: result size %d exceeds limit %d", store.ErrObjectTooLarge, rec.offset, resultSize, limit) } if baseSize != uint64(len(baseContent)) { - return fmt.Errorf("%w: entry at %d: delta base size mismatch", ErrMalformedPack, rec.offset) + return nil, fmt.Errorf("%w: entry at %d: delta base size mismatch", ErrMalformedPack, rec.offset) } content, err := delta.Apply(baseContent, deltaPayload) if err != nil { - return fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) + return nil, fmt.Errorf("%w: entry at %d: %w", ErrMalformedPack, rec.offset, err) } if uint64(len(content)) != resultSize { - return fmt.Errorf("%w: entry at %d: delta result size mismatch", ErrMalformedPack, rec.offset) + return nil, fmt.Errorf("%w: entry at %d: delta result size mismatch", ErrMalformedPack, rec.offset) } - oid, err := ingestion.hashObject(baseType, content) - if err != nil { - return err - } + return content, nil +} - rec.objectType = baseType - rec.oid = oid - rec.resolved = true - ingestion.byOID[oid] = index +func (ingestion *ingestion) baseRecordIndex(rec *record) (int, bool) { + switch rec.packedType { + case packfile.EntryTypeOfsDelta: + index, ok := ingestion.byOffset[rec.baseOffset] + + return index, ok + case packfile.EntryTypeRefDelta: + index, ok := ingestion.byOID.Load(rec.baseOID) - ingestion.deltasResolved++ - meter.Set(ingestion.deltasResolved, 0) + return index, ok + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } - return ingestion.resolveSubtree(index, content, baseType, depth, adjacency, meter) + return 0, false } // inflateRecord inflates one record's payload from the temporary pack file. @@ -213,20 +419,15 @@ func (ingestion *ingestion) inflateRecord(index int) ([]byte, error) { } // hashObject computes the object ID of one resolved object. -func (ingestion *ingestion) hashObject(objectType packfile.EntryType, content []byte) (id.ObjectID, error) { +func (ingestion *ingestion) hashObject(objectType typ.Type, content []byte) (id.ObjectID, error) { var zero id.ObjectID - ty, err := objectType.ObjectType() - if err != nil { - return zero, fmt.Errorf("object/store/packed/internal/ingest: %w", err) - } - hashImpl, err := ingestion.objectFormat.New() if err != nil { return zero, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } - _, _ = hashImpl.Write(header.Append(nil, ty, len(content))) + _, _ = hashImpl.Write(header.Append(nil, objectType, len(content))) _, _ = hashImpl.Write(content) oid, err := ingestion.objectFormat.FromBytes(hashImpl.Sum(nil)) @@ -261,7 +462,7 @@ func (ingestion *ingestion) countDeltas() int { // so the unresolved records are exactly the unresolved deltas: // the delta records minus those already resolved. func (ingestion *ingestion) countUnresolved() int { - return ingestion.deltaCount - ingestion.deltasResolved + return ingestion.deltaCount - int(ingestion.deltasResolved.Load()) } // unresolvedExternalBases returns the unique base object IDs @@ -270,7 +471,7 @@ func (ingestion *ingestion) countUnresolved() int { func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID { seen := make(map[id.ObjectID]struct{}) - var out []id.ObjectID + out := make([]id.ObjectID, 0, ingestion.deltaCount-int(ingestion.deltasResolved.Load())) for index := range ingestion.records { rec := &ingestion.records[index] @@ -278,7 +479,7 @@ func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID { continue } - if _, ok := ingestion.byOID[rec.baseOID]; ok { + if _, ok := ingestion.byOID.Load(rec.baseOID); ok { continue } diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go index 0ae5593a..9cd6ef1d 100644 --- a/object/store/packed/internal/ingest/result.go +++ b/object/store/packed/internal/ingest/result.go @@ -13,6 +13,9 @@ type Result struct { // RevName is the destination-relative name of the written reverse index. RevName string + // BloomName is the destination-relative name of the written Bloom filter. + BloomName string + // PackHash is the pack trailer hash // shared by the pack, index, and reverse index. PackHash id.ObjectID diff --git a/object/store/packed/internal/ingest/scan.go b/object/store/packed/internal/ingest/scan.go index 6b3b73b7..2cb5c135 100644 --- a/object/store/packed/internal/ingest/scan.go +++ b/object/store/packed/internal/ingest/scan.go @@ -13,6 +13,7 @@ import ( "lindenii.org/go/furgit/internal/progress" "lindenii.org/go/furgit/object/header" "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" "lindenii.org/go/lgo/intconv" ) @@ -294,13 +295,22 @@ func (ingestion *ingestion) streamAndScan() error { Throughput: true, }) - for done := range ingestion.headerCount { - err := ingestion.scanEntry(ingestion.scanner.consumed) + prevConsumed := ingestion.scanner.consumed + + for range ingestion.headerCount { + err := ingestion.ctx.Err() + if err != nil { + return fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + err = ingestion.scanEntry(ingestion.scanner.consumed) if err != nil { return err } - meter.Set(done+1, ingestion.scanner.consumed) + consumed := ingestion.scanner.consumed + meter.Add(1, int64(consumed-prevConsumed)) + prevConsumed = consumed } meter.Stop("done") @@ -345,7 +355,6 @@ func (ingestion *ingestion) scanEntry(start int) error { rec.crc32 = ingestion.scanner.endCRC() if rec.packedType.IsBase() { - rec.objectType = rec.packedType rec.oid = oid rec.resolved = true } else { @@ -357,7 +366,7 @@ func (ingestion *ingestion) scanEntry(start int) error { ingestion.byOffset[rec.offset] = index if rec.resolved { - ingestion.byOID[rec.oid] = index + ingestion.byOID.Store(rec.oid, index) } return nil @@ -384,6 +393,11 @@ func (ingestion *ingestion) scanHeader(start int) (record, error) { return rec, fmt.Errorf("%w: entry at %d: declared size overflows int: %w", ErrMalformedPack, start, err) } + limit := ingestion.opts.MaxObjectSize + if limit > 0 && declaredSize > limit { + return rec, fmt.Errorf("%w: entry at %d: declared size %d exceeds limit %d", store.ErrObjectTooLarge, start, declaredSize, limit) + } + rec.packedType = entryHeader.Type rec.declaredSize = declaredSize rec.headerLen = entryHeader.HeaderLen diff --git a/object/store/packed/internal/ingest/thin.go b/object/store/packed/internal/ingest/thin.go index 8d1566e0..15773a56 100644 --- a/object/store/packed/internal/ingest/thin.go +++ b/object/store/packed/internal/ingest/thin.go @@ -33,9 +33,14 @@ func (ingestion *ingestion) fixThin(external []id.ObjectID, adjacency adjacency, // Drop the trailer from the write cursor. ingestion.scanner.consumed -= hashSize - var appended []int + appended := make([]int, 0, len(external)) for _, baseOID := range external { + err := ingestion.ctx.Err() + if err != nil { + return fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + ty, content, err := ingestion.opts.ThinBase.ReadBytesContent(baseOID) if errors.Is(err, store.ErrObjectNotFound) { continue @@ -86,7 +91,7 @@ func (ingestion *ingestion) appendBaseObject(objectID id.ObjectID, objectType ty return 0, fmt.Errorf("object/store/packed/internal/ingest: %w", err) } - computed, err := ingestion.hashObject(entryType, content) + computed, err := ingestion.hashObject(objectType, content) if err != nil { return 0, err } @@ -138,7 +143,6 @@ func (ingestion *ingestion) appendBaseObject(objectID id.ObjectID, objectType ty declaredSize: len(content), baseOffset: 0, baseOID: id.ObjectID{}, - objectType: entryType, oid: objectID, resolved: true, } @@ -146,7 +150,7 @@ func (ingestion *ingestion) appendBaseObject(objectID id.ObjectID, objectType ty index := len(ingestion.records) ingestion.records = append(ingestion.records, rec) ingestion.byOffset[start] = index - ingestion.byOID[objectID] = index + ingestion.byOID.Store(objectID, index) return index, nil } diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go index 394d8f6e..b2f4d2b8 100644 --- a/object/store/packed/internal/ingest/writepack_test.go +++ b/object/store/packed/internal/ingest/writepack_test.go @@ -2,12 +2,15 @@ package ingest_test import ( "bytes" + "context" "errors" "io" "os" "path/filepath" "testing" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/testgit" "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/store" @@ -89,6 +92,81 @@ func TestWritePackMatchesGit(t *testing.T) { } } +// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter +// that reports every object in the pack as present. +func TestWritePackBloom(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: true, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + }) + + if result.BloomName == "" { + t.Fatal("BloomName is empty") + } + + bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile bloom: %v", err) + } + + filter, err := bloom.Parse(bloomBytes, objectFormat) + if err != nil { + t.Fatalf("bloom.Parse: %v", err) + } + + idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile idx: %v", err) + } + + index, err := packidx.Parse(idxBytes, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + if !bytes.Equal(filter.PackHash(), index.PackHash()) { + t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash()) + } + + for pos := range index.NumObjects() { + if !filter.MayContain(index.OIDAt(pos)) { + t.Fatalf("filter rejects object at index position %d", pos) + } + } + }) + } +} + // TestWritePackEmpty verifies that a zero-object pack // succeeds without writing any artifacts. func TestWritePackEmpty(t *testing.T) { @@ -179,7 +257,7 @@ func TestWritePackIdempotent(t *testing.T) { t.Cleanup(func() { _ = root.Close() }) - first, err := ingest.WritePack(root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + first, err := ingest.WritePack(t.Context(), root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ ThinBase: nil, Progress: nil, }) @@ -187,7 +265,7 @@ func TestWritePackIdempotent(t *testing.T) { t.Fatalf("first WritePack: %v", err) } - second, err := ingest.WritePack(root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + second, err := ingest.WritePack(t.Context(), root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ ThinBase: nil, Progress: nil, }) @@ -228,7 +306,7 @@ func writePack( t.Cleanup(func() { _ = root.Close() }) - result, err := ingest.WritePack(root, objectFormat, src, opts) + result, err := ingest.WritePack(t.Context(), root, objectFormat, src, opts) if err != nil { t.Fatalf("WritePack: %v", err) } @@ -278,7 +356,7 @@ func TestWritePackThinWithoutBase(t *testing.T) { repo, seeded := seedHistory(t, objectFormat) stream := thinStream(t, repo, seeded) - _, err := ingest.WritePack(freshRoot(t), objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + _, err := ingest.WritePack(t.Context(), freshRoot(t), objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ ThinBase: nil, Progress: nil, }) @@ -304,7 +382,7 @@ func TestWritePackThinMissingBase(t *testing.T) { emptyBase := emptyStore(t, objectFormat) stream := thinStream(t, repo, seeded) - _, err := ingest.WritePack(freshRoot(t), objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + _, err := ingest.WritePack(t.Context(), freshRoot(t), objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ ThinBase: emptyBase, Progress: nil, }) @@ -321,6 +399,118 @@ func TestWritePackThinMissingBase(t *testing.T) { } } +// TestWritePackContextCancelled verifies that a cancelled context +// aborts ingestion and publishes no artifacts. +func TestWritePackContextCancelled(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, seeded := seedHistory(t, objectFormat) + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: false, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + ctx, cancel := context.WithCancel(t.Context()) + cancel() + + dir := t.TempDir() + + root, err := os.OpenRoot(dir) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + t.Cleanup(func() { _ = root.Close() }) + + _, err = ingest.WritePack(ctx, root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("err = %v, want context.Canceled", err) + } + + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("ReadDir: %v", err) + } + + if len(entries) != 0 { + t.Fatalf("cancelled ingestion left %d files behind", len(entries)) + } + }) + } +} + +// TestWritePackObjectTooLarge verifies that an object exceeding MaxObjectSize +// is rejected and no artifacts are published. +func TestWritePackObjectTooLarge(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, seeded := seedHistory(t, objectFormat) + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: false, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + dir := t.TempDir() + + root, err := os.OpenRoot(dir) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + t.Cleanup(func() { _ = root.Close() }) + + _, err = ingest.WritePack(t.Context(), root, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + MaxObjectSize: 1, + }) + if !errors.Is(err, store.ErrObjectTooLarge) { + t.Fatalf("err = %v, want ErrObjectTooLarge", err) + } + + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("ReadDir: %v", err) + } + + if len(entries) != 0 { + t.Fatalf("rejected ingestion left %d files behind", len(entries)) + } + }) + } +} + // seedHistory creates one repository with a seeded history. func seedHistory(t *testing.T, objectFormat id.ObjectFormat) (*testgit.Repo, testgit.Seeded) { t.Helper() diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go index e54d34b2..e06870a9 100644 --- a/object/store/packed/lookup.go +++ b/object/store/packed/lookup.go @@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) { oid := objectID.RawBytes() for _, p := range packed.order.Keys() { + if p.filter != nil && !p.filter.MayContain(oid) { + continue + } + offsetU, found, err := p.idx.Lookup(oid) if err != nil { return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go index dd43bc7a..9cd6162b 100644 --- a/object/store/packed/pack.go +++ b/object/store/packed/pack.go @@ -8,6 +8,7 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/mmap" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -36,6 +37,9 @@ type pack struct { // and data aliases them. dataMapping *mmap.Mmap data []byte + + bloomMapping *mmap.Mmap + filter *bloom.Bloom } // openPack opens, maps, and validates @@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack, return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err) } + bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash()) + return &pack{ - name: name, - idxMapping: idxMapping, - idx: idx, - dataMapping: dataMapping, - data: dataMapping.Data(), + name: name, + idxMapping: idxMapping, + idx: idx, + dataMapping: dataMapping, + data: dataMapping.Data(), + bloomMapping: bloomMapping, + filter: filter, }, nil } +func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) { + mapping, err := mapFile(root, name+".bloom") + if err != nil { + return nil, nil + } + + filter, err := bloom.Parse(mapping.Data(), objectFormat) + if err != nil { + _ = mapping.Close() + + return nil, nil + } + + if !bytes.Equal(filter.PackHash(), packHash) { + _ = mapping.Close() + + return nil, nil + } + + return mapping, &filter +} + // mapFile opens and maps one file under root. func mapFile(root *os.Root, name string) (*mmap.Mmap, error) { file, err := root.Open(name) @@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error { return nil } -// close releases the pack data and index mappings. +// close releases the pack data, index, and filter mappings. func (pack *pack) close() error { - return errors.Join( + errs := []error{ pack.dataMapping.Close(), pack.idxMapping.Close(), - ) + } + + if pack.bloomMapping != nil { + errs = append(errs, pack.bloomMapping.Close()) + } + + return errors.Join(errs...) } diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go index f22c2445..897b3b98 100644 --- a/object/store/packed/packed.go +++ b/object/store/packed/packed.go @@ -63,7 +63,7 @@ func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) { packed := &Packed{ root: root, objectFormat: objectFormat, - order: mru.New[*pack](), + order: mru.New[*pack](mru.Options{Interval: 48}), baseCache: newBaseCache(), refreshMu: sync.Mutex{}, byName: nil, diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go index 5e0b85cb..6f6a8c18 100644 --- a/object/store/packed/quarantine.go +++ b/object/store/packed/quarantine.go @@ -95,29 +95,46 @@ func (quarantine *packQuarantine) promoteAll() error { return packPromotionPriority(left.Name()) - packPromotionPriority(right.Name()) }) + var created []string + for _, entry := range entries { - err := quarantine.promoteFile(entry.Name()) + linked, err := quarantine.promoteFile(entry.Name()) if err != nil { + for i := len(created) - 1; i >= 0; i-- { + _ = quarantine.parent.root.Remove(created[i]) + } + return err } + + if linked { + created = append(created, entry.Name()) + } } return nil } -// promoteFile links one quarantined artifact into the parent store, -// treating an already-present destination as success. -func (quarantine *packQuarantine) promoteFile(name string) error { +// promoteFile links one quarantined artifact into the parent store +// and reports whether the destination was newly created. +// A pre-existing destination is treated as success; rollback must not remove it. +func (quarantine *packQuarantine) promoteFile(name string) (bool, error) { src := quarantine.tempName + "/" + name err := quarantine.parent.root.Link(src, name) - if err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("object/store/packed: promoting %q: %w", name, err) - } - _ = quarantine.parent.root.Remove(src) + switch { + case err == nil: + _ = quarantine.parent.root.Remove(src) - return nil + return true, nil + case errors.Is(err, fs.ErrExist): + _ = quarantine.parent.root.Remove(src) + + return false, nil + default: + return false, fmt.Errorf("object/store/packed: promoting %q: %w", name, err) + } } // createPackQuarantineRoot creates a private quarantine directory beneath parent @@ -156,6 +173,8 @@ func packPromotionPriority(name string) int { return 1 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"): return 2 + case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"): + return 2 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"): return 3 default: diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go index bfc82eff..cf433cfc 100644 --- a/object/store/packed/reader.go +++ b/object/store/packed/reader.go @@ -165,7 +165,7 @@ func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, int, io return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: object size overflows int: %w", ErrMalformedPackedStore, p.name, err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) } diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go index 14c66013..f06e9859 100644 --- a/object/store/packed/refresh.go +++ b/object/store/packed/refresh.go @@ -23,7 +23,7 @@ func (packed *Packed) Refresh() error { next := make(map[string]*pack, len(packed.byName)) - var opened []*pack + opened := make([]*pack, 0, len(dirEntries)) for _, dirEntry := range dirEntries { name, ok := strings.CutSuffix(dirEntry.Name(), ".idx") diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go index 025f6f62..e54dc97d 100644 --- a/object/store/packed/refresh_test.go +++ b/object/store/packed/refresh_test.go @@ -101,7 +101,7 @@ func cp(t *testing.T, src, dst string) { t.Fatalf("ReadFile: %v", err) } - err = os.WriteFile(dst, data, 0o600) //nolint:gosec + err = os.WriteFile(dst, data, 0o600) //#nosec G703 if err != nil { t.Fatalf("WriteFile: %v", err) } diff --git a/object/store/packed/writer.go b/object/store/packed/writer.go index 59309c24..6476cc42 100644 --- a/object/store/packed/writer.go +++ b/object/store/packed/writer.go @@ -1,6 +1,7 @@ package packed import ( + "context" "fmt" "io" @@ -23,8 +24,8 @@ var _ store.PackWriter = (*Packed)(nil) // The pack must be the last thing the peer sends before that response: // any bytes arriving immediately after the trailer // are rejected as a malformed pack. -func (packed *Packed) WritePack(src io.Reader, opts store.PackWriteOptions) error { - _, err := ingest.WritePack(packed.root, packed.objectFormat, src, opts) +func (packed *Packed) WritePack(ctx context.Context, src io.Reader, opts store.PackWriteOptions) error { + _, err := ingest.WritePack(ctx, packed.root, packed.objectFormat, src, opts) if err != nil { return err //nolint:wrapcheck } diff --git a/object/store/packed/writer_test.go b/object/store/packed/writer_test.go index 8227caa7..d668647b 100644 --- a/object/store/packed/writer_test.go +++ b/object/store/packed/writer_test.go @@ -42,7 +42,7 @@ func TestWritePack(t *testing.T) { packedStore := openEmptyStore(t, objectFormat) - err = packedStore.WritePack(bytes.NewReader(stream), store.PackWriteOptions{ + err = packedStore.WritePack(t.Context(), bytes.NewReader(stream), store.PackWriteOptions{ ThinBase: nil, Progress: nil, }) diff --git a/object/store/reader.go b/object/store/reader.go index 7979fb6c..bbfe1fe8 100644 --- a/object/store/reader.go +++ b/object/store/reader.go @@ -23,12 +23,21 @@ type ObjectReader interface { // Users should treat this as an invariant; // implementations should not re-verify it on every read. // - // Labels: Life-Parent. + // The returned slice may alias storage owned by the backend, + // such as a memory-mapped pack or a shared cache buffer. + // Callers must not mutate it + // and must not retain it past the backend's lifetime. + // + // Labels: Life-Parent, Mut-No. ReadBytesFull(id id.ObjectID) ([]byte, error) // ReadBytesContent reads an object's type and content bytes. // - // Labels: Life-Parent. + // The returned slice may alias backend-owned storage. + // Callers must not mutate it + // and must not retain it past the backend's lifetime. + // + // Labels: Life-Parent, Mut-No. ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error) // ReadReaderFull reads a full serialized object stream diff --git a/object/store/writer.go b/object/store/writer.go index d83eec6a..0437505d 100644 --- a/object/store/writer.go +++ b/object/store/writer.go @@ -1,6 +1,7 @@ package store import ( + "context" "errors" "io" @@ -12,6 +13,10 @@ import ( // ErrInvalidObject indicates a malformed object passed to a write. var ErrInvalidObject = errors.New("object/store: invalid object") +// ErrObjectTooLarge indicates that an object exceeds +// the size limit configured for the write. +var ErrObjectTooLarge = errors.New("object/store: object too large") + // ObjectWriter writes individual Git objects. type ObjectWriter interface { // WriteBytesFull writes one full serialized object byte slice as "type size\x00content". @@ -32,7 +37,7 @@ type PackWriter interface { // WritePack ingests one pack stream, // such that the objects contained therein // become available in the relevant store. - WritePack(src io.Reader, opts PackWriteOptions) error + WritePack(ctx context.Context, src io.Reader, opts PackWriteOptions) error } // PackWriteOptions controls one pack write operation. @@ -65,4 +70,11 @@ type PackWriteOptions struct { // // When nil, no progress output is emitted. Progress iowrap.WriteFlusher + + // MaxObjectSize rejects ingestion of any object + // whose declared inflated size or delta result size exceeds it, + // bounding the memory spent reconstructing a single object. + // + // Zero or negative means no limit. + MaxObjectSize int } diff --git a/object/stored/stored.go b/object/stored/stored.go index 68a7bfd0..99c497ee 100644 --- a/object/stored/stored.go +++ b/object/stored/stored.go @@ -18,7 +18,7 @@ func New[T object.Object](id id.ObjectID, obj T) *Stored[T] { } // Object returns the wrapped object as itself. -func (stored *Stored[T]) Object() T { //nolint:ireturn +func (stored *Stored[T]) Object() T { return stored.obj } diff --git a/object/tag/append.go b/object/tag/append.go index 15a6fde9..2f524a73 100644 --- a/object/tag/append.go +++ b/object/tag/append.go @@ -27,7 +27,7 @@ func (tag *Tag) AppendWithoutHeader(dst []byte) ([]byte, error) { for _, h := range tag.ExtraHeaders { // GIGO on empty keys and such. - dst = append(dst, []byte(h.Key)...) + dst = append(dst, h.Key...) dst = append(dst, byte(' ')) dst = append(dst, h.Value...) dst = append(dst, byte('\n')) diff --git a/object/tag/clone.go b/object/tag/clone.go new file mode 100644 index 00000000..0f792bc1 --- /dev/null +++ b/object/tag/clone.go @@ -0,0 +1,29 @@ +package tag + +import "bytes" + +// Clone returns a deep copy of the tag +// whose byte fields are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (tag *Tag) Clone() *Tag { + clone := &Tag{ + TargetID: tag.TargetID, + TargetType: tag.TargetType, + Name: bytes.Clone(tag.Name), + Tagger: tag.Tagger.Clone(), + Message: bytes.Clone(tag.Message), + } + + if tag.ExtraHeaders != nil { + clone.ExtraHeaders = make([]ExtraHeader, len(tag.ExtraHeaders)) + for i, h := range tag.ExtraHeaders { + clone.ExtraHeaders[i] = ExtraHeader{ + Key: bytes.Clone(h.Key), + Value: bytes.Clone(h.Value), + } + } + } + + return clone +} diff --git a/object/tag/parse.go b/object/tag/parse.go index c5ea7e14..1fcc7c2c 100644 --- a/object/tag/parse.go +++ b/object/tag/parse.go @@ -15,6 +15,16 @@ import ( var ErrInvalidTag = errors.New("object/tag: invalid tag") // Parse decodes a tag object body. +// +// The returned tag aliases body: +// its Name, Message, and extra-header fields, +// along with the byte fields of its tagger signature, +// share body's backing array. +// The tag inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Tag.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { t := new(Tag) @@ -56,7 +66,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { return nil, fmt.Errorf("%w: tag name: %w", ErrInvalidTag, err) } - t.Name = append([]byte(nil), line...) + t.Name = line i = next line, next, err = requiredHeaderLine(body, i, "tagger") @@ -84,7 +94,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { i += rel + 1 if len(line) == 0 { - t.Message = append([]byte(nil), body[i:]...) + t.Message = body[i:] return t, nil } @@ -112,8 +122,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { } default: t.ExtraHeaders = append(t.ExtraHeaders, ExtraHeader{ - Key: string(key), - Value: append([]byte(nil), value...), + Key: key, + Value: value, }) } } diff --git a/object/tag/roundtrip_test.go b/object/tag/roundtrip_test.go index c49b3d75..cf4b69a1 100644 --- a/object/tag/roundtrip_test.go +++ b/object/tag/roundtrip_test.go @@ -42,8 +42,8 @@ func TestRoundTrip(t *testing.T) { }, Message: []byte("roundtrip subject\n\nroundtrip body\n\n"), ExtraHeaders: []tag.ExtraHeader{ - {Key: "encoding", Value: []byte("UTF-8")}, - {Key: "x-test-header", Value: []byte("value")}, + {Key: []byte("encoding"), Value: []byte("UTF-8")}, + {Key: []byte("x-test-header"), Value: []byte("value")}, }, } @@ -102,7 +102,7 @@ func assertTagEqual(t *testing.T, got *tag.Tag, want *tag.Tag) { } if !slices.EqualFunc(got.ExtraHeaders, want.ExtraHeaders, func(gotHeader tag.ExtraHeader, wantHeader tag.ExtraHeader) bool { - return gotHeader.Key == wantHeader.Key && bytes.Equal(gotHeader.Value, wantHeader.Value) + return bytes.Equal(gotHeader.Key, wantHeader.Key) && bytes.Equal(gotHeader.Value, wantHeader.Value) }) { t.Fatalf("extra headers = %+v, want %+v", got.ExtraHeaders, want.ExtraHeaders) } diff --git a/object/tag/tag.go b/object/tag/tag.go index f4b36c30..a4572921 100644 --- a/object/tag/tag.go +++ b/object/tag/tag.go @@ -20,6 +20,6 @@ type Tag struct { // ExtraHeader represents an extra header in a Git tag object. type ExtraHeader struct { - Key string + Key []byte Value []byte } diff --git a/object/tree/clone.go b/object/tree/clone.go new file mode 100644 index 00000000..d00c62f2 --- /dev/null +++ b/object/tree/clone.go @@ -0,0 +1,24 @@ +package tree + +import "bytes" + +// Clone returns a deep copy of the tree +// whose entry names are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (tree *Tree) Clone() *Tree { + if tree.entries == nil { + return &Tree{} + } + + clone := &Tree{entries: make([]Entry, len(tree.entries))} + for i, entry := range tree.entries { + clone.entries[i] = Entry{ + Mode: entry.Mode, + Name: bytes.Clone(entry.Name), + ID: entry.ID, + } + } + + return clone +} diff --git a/object/tree/compare.go b/object/tree/compare.go index 78bf56a4..9bf16f90 100644 --- a/object/tree/compare.go +++ b/object/tree/compare.go @@ -6,7 +6,7 @@ package tree // treating directory names as if they carried a trailing '/'. // entryIsTree and searchIsTree indicate // whether the respective names belong to subtree entries. -func nameCompare(entryName string, entryIsTree bool, searchName string, searchIsTree bool) int { +func nameCompare(entryName []byte, entryIsTree bool, searchName []byte, searchIsTree bool) int { entryLen := len(entryName) if entryIsTree { entryLen++ diff --git a/object/tree/helpers_test.go b/object/tree/helpers_test.go index d6cfddb6..3e5eddd4 100644 --- a/object/tree/helpers_test.go +++ b/object/tree/helpers_test.go @@ -43,26 +43,26 @@ func mixedEntries(tb testing.TB, repo *testgit.Repo) []tree.Entry { } return []tree.Entry{ - {Mode: mode.Regular, Name: "z", ID: blobA}, - {Mode: mode.Regular, Name: "A", ID: blobB}, - {Mode: mode.Regular, Name: "aa", ID: blobC}, - {Mode: mode.Regular, Name: "a0", ID: blobA}, - {Mode: mode.Regular, Name: "a.", ID: blobC}, - {Mode: mode.Regular, Name: "Z", ID: blobB}, - {Mode: mode.Regular, Name: "0", ID: blobA}, - {Mode: mode.Regular, Name: "CAPS", ID: blobB}, - {Mode: mode.Regular, Name: "caps", ID: blobC}, - {Mode: mode.Regular, Name: "name with space", ID: blobB}, - {Mode: mode.Regular, Name: "name.with.dot", ID: blobA}, - {Mode: mode.Regular, Name: "这是一些非 ASCII 的字符", ID: blobC}, - {Mode: mode.Regular, Name: "Emoji 👀", ID: blobC}, - {Mode: mode.Regular, Name: ".hidden", ID: blobA}, - {Mode: mode.Executable, Name: "exec.sh", ID: blobB}, - {Mode: mode.Symlink, Name: "sym.link", ID: blobC}, - {Mode: mode.Gitlink, Name: "submodule", ID: submodule}, - {Mode: mode.Regular, Name: "dir-", ID: blobA}, - {Mode: mode.Directory, Name: "dir", ID: subTree}, - {Mode: mode.Regular, Name: "dir0", ID: blobB}, + {Mode: mode.Regular, Name: []byte("z"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("A"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("aa"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("a0"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("a."), ID: blobC}, + {Mode: mode.Regular, Name: []byte("Z"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("0"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("CAPS"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("caps"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("name with space"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("name.with.dot"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("这是一些非 ASCII 的字符"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("Emoji 👀"), ID: blobC}, + {Mode: mode.Regular, Name: []byte(".hidden"), ID: blobA}, + {Mode: mode.Executable, Name: []byte("exec.sh"), ID: blobB}, + {Mode: mode.Symlink, Name: []byte("sym.link"), ID: blobC}, + {Mode: mode.Gitlink, Name: []byte("submodule"), ID: submodule}, + {Mode: mode.Regular, Name: []byte("dir-"), ID: blobA}, + {Mode: mode.Directory, Name: []byte("dir"), ID: subTree}, + {Mode: mode.Regular, Name: []byte("dir0"), ID: blobB}, } } @@ -73,7 +73,7 @@ func mkTreeEntries(entries []tree.Entry) []testgit.TreeEntry { Mode: strconv.FormatUint(uint64(entry.Mode), 8), Type: entry.Mode.ObjectType(), OID: entry.ID, - Name: entry.Name, + Name: string(entry.Name), } } @@ -124,7 +124,7 @@ func assertGitDecode(tb testing.TB, repo *testgit.Repo, treeID id.ObjectID, got tb.Fatalf("entry[%d] id = %s, want %s", i, got[i].ID, want[i].OID) } - if got[i].Name != want[i].Name { + if string(got[i].Name) != want[i].Name { tb.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name) } } diff --git a/object/tree/insert.go b/object/tree/insert.go index 5e519069..b6c52400 100644 --- a/object/tree/insert.go +++ b/object/tree/insert.go @@ -1,10 +1,10 @@ package tree import ( + "bytes" "errors" "fmt" "slices" - "strings" "lindenii.org/go/furgit/object/tree/mode" ) @@ -42,16 +42,16 @@ func (tree *Tree) Insert(entry Entry) error { } // validateName checks that name is a structurally valid tree entry name. -func validateName(name string) error { - if name == "" { +func validateName(name []byte) error { + if len(name) == 0 { return fmt.Errorf("%w: empty entry name", ErrInvalidTree) } - if strings.IndexByte(name, 0) >= 0 { + if bytes.IndexByte(name, 0) >= 0 { return fmt.Errorf("%w: entry name %q contains NUL", ErrInvalidTree, name) } - if strings.IndexByte(name, '/') >= 0 { + if bytes.IndexByte(name, '/') >= 0 { return fmt.Errorf("%w: entry name %q contains '/'", ErrInvalidTree, name) } diff --git a/object/tree/insert_test.go b/object/tree/insert_test.go index fbf65b84..1dd406d5 100644 --- a/object/tree/insert_test.go +++ b/object/tree/insert_test.go @@ -18,10 +18,10 @@ func TestInsertRejects(t *testing.T) { name string entry tree.Entry }{ - {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: "", ID: zero}}, - {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: "a/b", ID: zero}}, - {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: "a\x00b", ID: zero}}, - {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: "file", ID: zero}}, + {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte(""), ID: zero}}, + {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a/b"), ID: zero}}, + {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a\x00b"), ID: zero}}, + {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: []byte("file"), ID: zero}}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() @@ -48,18 +48,18 @@ func TestInsertRejectsConflict(t *testing.T) { }{ { name: "same-mode", - first: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}, - second: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}, + first: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero}, + second: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero}, }, { name: "blob-then-tree", - first: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero}, - second: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero}, + first: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero}, + second: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero}, }, { name: "tree-then-blob", - first: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero}, - second: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero}, + first: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero}, + second: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero}, }, } { t.Run(tc.name, func(t *testing.T) { @@ -79,24 +79,3 @@ func TestInsertRejectsConflict(t *testing.T) { }) } } - -func TestEntriesIsCopy(t *testing.T) { - t.Parallel() - - zero := id.SupportedObjectFormats()[0].Zero() - - var tr tree.Tree - - err := tr.Insert(tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}) - if err != nil { - t.Fatalf("Insert: %v", err) - } - - entries := tr.Entries() - entries[0].Name = "mutated" - - again := tr.Entries() - if again[0].Name != "file" { - t.Fatalf("Entries()[0].Name = %q, want %q", again[0].Name, "file") - } -} diff --git a/object/tree/lookup.go b/object/tree/lookup.go index 34a01748..2ff6ce76 100644 --- a/object/tree/lookup.go +++ b/object/tree/lookup.go @@ -1,6 +1,7 @@ package tree import ( + "bytes" "slices" "lindenii.org/go/furgit/object/tree/mode" @@ -10,13 +11,18 @@ import ( // // A name matches whether stored as a blob-like or as a subtree entry, // so both orderings are searched. -// The returned entry is a copy; mutating it does not affect the tree. -func (tree *Tree) Find(name string) (Entry, bool) { +// +// The returned entry is a shallow copy: +// its Name aliases the tree's internal storage, +// so it must not be mutated and shares the tree's lifetime. +// +// Labels: Life-Parent, Mut-No. +func (tree *Tree) Find(name []byte) (Entry, bool) { for _, searchIsTree := range [...]bool{true, false} { - index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target string) int { + index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target []byte) int { return nameCompare(existing.Name, existing.Mode == mode.Directory, target, searchIsTree) }) - if ok && tree.entries[index].Name == name { + if ok && bytes.Equal(tree.entries[index].Name, name) { return tree.entries[index], true } } diff --git a/object/tree/lookup_test.go b/object/tree/lookup_test.go index 22d73615..706c1cd2 100644 --- a/object/tree/lookup_test.go +++ b/object/tree/lookup_test.go @@ -1,6 +1,7 @@ package tree_test import ( + "bytes" "testing" "lindenii.org/go/furgit/internal/testgit" @@ -28,12 +29,12 @@ func TestFind(t *testing.T) { t.Fatalf("Find(%q) not found", want.Name) } - if got.Mode != want.Mode || got.Name != want.Name || got.ID != want.ID { + if got.Mode != want.Mode || !bytes.Equal(got.Name, want.Name) || got.ID != want.ID { t.Fatalf("Find(%q) = %+v, want %+v", want.Name, got, want) } } - if _, ok := tr.Find("does-not-exist"); ok { + if _, ok := tr.Find([]byte("does-not-exist")); ok { t.Fatalf("Find(does-not-exist) = true, want false") } }) diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go index ca00ea94..8a22b90f 100644 --- a/object/tree/malformed_test.go +++ b/object/tree/malformed_test.go @@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) { {name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)}, {name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)}, {name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)}, + {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() diff --git a/object/tree/parse.go b/object/tree/parse.go index 5b01fa05..bd6ed3b0 100644 --- a/object/tree/parse.go +++ b/object/tree/parse.go @@ -14,10 +14,22 @@ import ( // correctly sized object IDs, and strictly increasing Git tree order. // It does not enforce fsck-level name policy // (for example ".", "..", ".git", or platform-specific aliases). +// +// The returned tree aliases body: +// each entry's Name shares body's backing array. +// The tree inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Tree.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { tree := new(Tree) idSize := objectFormat.Size() - seen := make(map[string]struct{}) + + const minEntryOverhead = 5 + 1 + 1 + 1 // mode, space, name, NUL + if estimate := len(body) / (minEntryOverhead + idSize); estimate > 0 { + tree.entries = make([]Entry, 0, estimate) + } i := 0 for i < len(body) { @@ -38,7 +50,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { return nil, fmt.Errorf("%w: missing name terminator at offset %d", ErrInvalidTree, i) } - name := string(body[i : i+nul]) + name := body[i : i+nul] i += nul + 1 err = validateName(name) @@ -67,14 +79,44 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { } } - if _, dup := seen[entry.Name]; dup { + if entryMode == mode.Directory && hasNonDirNamed(tree.entries, entry.Name) { return nil, fmt.Errorf("%w: duplicate entry name %q", ErrInvalidTree, entry.Name) } - seen[entry.Name] = struct{}{} - tree.entries = append(tree.entries, entry) } return tree, nil } + +// hasNonDirNamed reports whether entries, sorted in Git tree order, +// holds a non-directory entry whose name equals name. +// +// The match sorts immediately below a directory of the same name, +// so the search gallops from the back before binary searching the bracket. +func hasNonDirNamed(entries []Entry, name []byte) bool { + lo, hi := 0, len(entries) + + for stride := 1; stride < hi-lo; stride *= 2 { + mid := hi - stride + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + + break + } + + hi = mid + } + + for lo < hi { + mid := lo + (hi-lo)/2 + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + + return lo < len(entries) && + nameCompare(entries[lo].Name, entries[lo].Mode == mode.Directory, name, false) == 0 +} diff --git a/object/tree/roundtrip_test.go b/object/tree/roundtrip_test.go index 3e82c79a..a9d5f40f 100644 --- a/object/tree/roundtrip_test.go +++ b/object/tree/roundtrip_test.go @@ -70,7 +70,7 @@ func assertEntriesEqual(t *testing.T, got []tree.Entry, want []tree.Entry) { t.Fatalf("entry[%d] mode = %o, want %o", i, got[i].Mode, want[i].Mode) } - if got[i].Name != want[i].Name { + if !bytes.Equal(got[i].Name, want[i].Name) { t.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name) } diff --git a/object/tree/tree.go b/object/tree/tree.go index 431df649..f40bb165 100644 --- a/object/tree/tree.go +++ b/object/tree/tree.go @@ -1,8 +1,6 @@ package tree import ( - "slices" - "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/tree/mode" ) @@ -21,15 +19,19 @@ type Tree struct { // Entry represents a single entry in a tree. type Entry struct { Mode mode.Mode - Name string + Name []byte ID id.ObjectID } -// Entries returns a copy of the tree's entries in Git tree order. +// Entries returns the tree's entries in Git tree order. // -// Mutating the returned slice does not affect the tree. +// The returned slice aliases the tree's internal storage, +// so it must not be mutated, +// and it is invalidated by any subsequent call that mutates the tree, +// such as [Tree.Insert]. +// Use [Tree.Clone] for an independent tree. // -// Labels: Life-Independent. +// Labels: Life-Parent, Mut-No. func (tree *Tree) Entries() []Entry { - return slices.Clone(tree.entries) + return tree.entries } diff --git a/ref/store/batch.go b/ref/store/batch.go new file mode 100644 index 00000000..dbe4c65b --- /dev/null +++ b/ref/store/batch.go @@ -0,0 +1,84 @@ +package store + +import "lindenii.org/go/furgit/object/id" + +// Batch stages reference operations for one non-atomic apply. +// +// Unlike Transaction, +// Batch may reject some queued operations +// while still applying others successfully when Apply runs. +// +// Labels: MT-Unsafe. +type Batch interface { + // Create creates one direct reference, + // requiring that the logical reference does not already exist. + Create(name string, newID id.ObjectID) error + + // Update updates one direct reference, + // requiring that the current logical reference value matches oldID. + Update(name string, newID, oldID id.ObjectID) error + + // Delete deletes one direct reference, + // requiring that the current logical reference value matches oldID. + Delete(name string, oldID id.ObjectID) error + + // Verify verifies that the current logical reference value matches oldID. + Verify(name string, oldID id.ObjectID) error + + // CreateSymbolic creates one symbolic reference, + // requiring that the named reference does not already exist. + CreateSymbolic(name, newTarget string) error + + // UpdateSymbolic updates one symbolic reference directly, + // requiring that its current target matches oldTarget. + UpdateSymbolic(name, newTarget, oldTarget string) error + + // DeleteSymbolic deletes one symbolic reference directly, + // requiring that its current target matches oldTarget. + DeleteSymbolic(name, oldTarget string) error + + // VerifySymbolic verifies that the named symbolic reference + // currently points at oldTarget. + VerifySymbolic(name, oldTarget string) error + + // Apply validates and applies queued operations, + // returning one result per queued operation in order. + // Fatal backend failures are returned separately. + // + // Malformed operations are rejected by the queueing methods above + // and do not enter the batch. + // + // Apply invalidates the receiver. + Apply() ([]BatchResult, error) + + // Abort abandons the batch and releases any resources it holds. + // + // Abort invalidates the receiver. + Abort() error +} + +// BatchStatus reports the outcome for one queued batch operation. +type BatchStatus uint8 + +const ( + // BatchStatusApplied indicates that the operation was applied. + BatchStatusApplied BatchStatus = iota + + // BatchStatusRejected indicates that the operation was rejected + // without aborting the rest of the batch. + BatchStatusRejected + + // BatchStatusFatal indicates that the operation triggered a fatal failure. + BatchStatusFatal + + // BatchStatusNotAttempted indicates that the operation was not attempted + // because an earlier operation failed fatally. + BatchStatusNotAttempted +) + +// BatchResult reports the outcome for one queued batch operation. +type BatchResult struct { + Name string + Status BatchStatus + Error error //exhaustruct:optional +} diff --git a/ref/store/batch_store.go b/ref/store/batch_store.go new file mode 100644 index 00000000..16ca3d92 --- /dev/null +++ b/ref/store/batch_store.go @@ -0,0 +1,9 @@ +package store + +// Batcher begins non-atomic reference batches. +type Batcher interface { + // BeginBatch creates one new queued batch. + // + // Labels: Deps-Borrowed, Life-Parent. + BeginBatch() (Batch, error) +} diff --git a/ref/store/doc.go b/ref/store/doc.go new file mode 100644 index 00000000..d4fed3b1 --- /dev/null +++ b/ref/store/doc.go @@ -0,0 +1,18 @@ +// Package store provides interfaces for reference storage backends. +// +// Reference stores work directly with reference values, +// [ref.Direct] and [ref.Symbolic]. +// Unlike object storage, +// they have no separate fetch layer +// to parse backend results into higher-level forms. +// +// The package separates read-only access +// from atomic transactions and non-atomic batches. +// Not every readable reference backend is writable, +// and not every writable backend offers the same update model. +// +// Concrete implementations generally inherit the contract +// documented by the interfaces they satisfy. +// Implementation docs focus on additional guarantees +// and implementation-specific behavior. +package store diff --git a/ref/store/errors.go b/ref/store/errors.go new file mode 100644 index 00000000..64666ee6 --- /dev/null +++ b/ref/store/errors.go @@ -0,0 +1,77 @@ +package store + +import ( + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" +) + +// ErrReferenceNotFound indicates that a reference does not exist in a backend. +var ErrReferenceNotFound = errors.New("ref/store: reference not found") + +// ErrCreateExists indicates that a create operation +// targeted an already-existing reference. +var ErrCreateExists = errors.New("ref/store: reference already exists") + +// ErrDuplicateUpdate indicates that one transaction or batch +// includes a duplicate resolved update target. +var ErrDuplicateUpdate = errors.New("ref/store: duplicate reference update") + +// ErrExpectedDirect indicates that an operation required a direct reference +// but found a different kind. +var ErrExpectedDirect = errors.New("ref/store: expected direct reference") + +// ErrExpectedSymbolic indicates that an operation required a symbolic reference +// but found a different kind. +var ErrExpectedSymbolic = errors.New("ref/store: expected symbolic reference") + +// ErrInvalidValue indicates that a requested reference value is invalid, +// such as an empty symbolic target +// or an object ID whose format does not match the store. +var ErrInvalidValue = errors.New("ref/store: invalid reference value") + +// ErrSymbolicCycle indicates that resolving a symbolic reference +// encountered a cycle. +var ErrSymbolicCycle = errors.New("ref/store: symbolic reference cycle") + +// NameConflictError indicates that one reference name conflicts with another +// visible or queued reference name. +type NameConflictError struct { + Other string +} + +// Error implements error. +func (err *NameConflictError) Error() string { + return fmt.Sprintf("ref/store: reference name conflict with %q", err.Other) +} + +// WrongOldIDError indicates that a direct operation's expected old object ID +// did not match the current reference value. +type WrongOldIDError struct { + Actual id.ObjectID + Expected id.ObjectID +} + +// Error implements error. +func (err *WrongOldIDError) Error() string { + return fmt.Sprintf( + "ref/store: incorrect old object id: got %s, expected %s", + err.Actual, err.Expected, + ) +} + +// WrongOldTargetError indicates that a symbolic operation's expected old target +// did not match the current reference target. +type WrongOldTargetError struct { + Actual string + Expected string +} + +// Error implements error. +func (err *WrongOldTargetError) Error() string { + return fmt.Sprintf( + "ref/store: incorrect old target: got %q, expected %q", + err.Actual, err.Expected, + ) +} diff --git a/ref/store/memory/batch.go b/ref/store/memory/batch.go new file mode 100644 index 00000000..0326e4f5 --- /dev/null +++ b/ref/store/memory/batch.go @@ -0,0 +1,206 @@ +package memory + +import ( + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref/store" +) + +// Batch stages in-memory updates for one subset commit. +type Batch struct { + store *Memory + ops []queuedUpdate +} + +var _ store.Batch = (*Batch)(nil) + +// BeginBatch creates one new in-memory batch. +func (memory *Memory) BeginBatch() (store.Batch, error) { + return &Batch{ + store: memory, + ops: make([]queuedUpdate, 0, 8), + }, nil +} + +// Create queues a direct reference creation. +func (batch *Batch) Create(name string, newID id.ObjectID) error { + return batch.queue(queuedUpdate{name: name, kind: updateCreate, newID: newID}) +} + +// Update queues a direct reference update. +func (batch *Batch) Update(name string, newID, oldID id.ObjectID) error { + return batch.queue(queuedUpdate{name: name, kind: updateReplace, newID: newID, oldID: oldID}) +} + +// Delete queues a direct reference deletion. +func (batch *Batch) Delete(name string, oldID id.ObjectID) error { + return batch.queue(queuedUpdate{name: name, kind: updateDelete, oldID: oldID}) +} + +// Verify queues a direct reference verification. +func (batch *Batch) Verify(name string, oldID id.ObjectID) error { + return batch.queue(queuedUpdate{name: name, kind: updateVerify, oldID: oldID}) +} + +// CreateSymbolic queues a symbolic reference creation. +func (batch *Batch) CreateSymbolic(name, newTarget string) error { + return batch.queue(queuedUpdate{name: name, kind: updateCreateSymbolic, newTarget: newTarget}) +} + +// UpdateSymbolic queues a symbolic reference update. +func (batch *Batch) UpdateSymbolic(name, newTarget, oldTarget string) error { + return batch.queue(queuedUpdate{name: name, kind: updateReplaceSymbolic, newTarget: newTarget, oldTarget: oldTarget}) +} + +// DeleteSymbolic queues a symbolic reference deletion. +func (batch *Batch) DeleteSymbolic(name, oldTarget string) error { + return batch.queue(queuedUpdate{name: name, kind: updateDeleteSymbolic, oldTarget: oldTarget}) +} + +// VerifySymbolic queues a symbolic reference verification. +func (batch *Batch) VerifySymbolic(name, oldTarget string) error { + return batch.queue(queuedUpdate{name: name, kind: updateVerifySymbolic, oldTarget: oldTarget}) +} + +// Apply validates queued operations, +// drops rejected operations, +// and applies the remaining compatible set. +// Concurrent readers observe either the pre-Apply state +// or the post-Apply state. +func (batch *Batch) Apply() ([]store.BatchResult, error) { + results := make([]store.BatchResult, len(batch.ops)) + remainingIdx := make([]int, 0, len(batch.ops)) + remainingOps := make([]queuedUpdate, 0, len(batch.ops)) + seenTargets := make(map[string]struct{}, len(batch.ops)) + + batch.store.mu.Lock() + defer batch.store.mu.Unlock() + + for i, op := range batch.ops { + results[i].Name = op.name + + target, err := resolveQueuedUpdateTarget(batch.store.refs, op) + if err != nil { + if isBatchRejected(err) { + results[i].Status = store.BatchStatusRejected + results[i].Error = err + + continue + } + + markFatal(results, batch.ops, i, err) + + return results, err + } + + if _, exists := seenTargets[target.name]; exists { + results[i].Status = store.BatchStatusRejected + results[i].Error = store.ErrDuplicateUpdate + + continue + } + + seenTargets[target.name] = struct{}{} + + remainingIdx = append(remainingIdx, i) + remainingOps = append(remainingOps, op) + } + + return batch.applyRemaining(results, remainingIdx, remainingOps) +} + +// Abort abandons the batch. +func (batch *Batch) Abort() error { + return nil +} + +// applyRemaining repeatedly prepares the remaining operations, +// dropping one rejected operation per round, +// until either the whole set applies cleanly or a fatal failure occurs. +func (batch *Batch) applyRemaining(results []store.BatchResult, remainingIdx []int, remainingOps []queuedUpdate) ([]store.BatchResult, error) { + for len(remainingOps) > 0 { + prepared, failedName, err := prepareUpdates(batch.store.refs, remainingOps) + if err == nil { + next := cloneRefs(batch.store.refs) + applyPreparedUpdates(next, prepared) + batch.store.refs = next + + for _, idx := range remainingIdx { + results[idx].Status = store.BatchStatusApplied + } + + return results, nil + } + + if !isBatchRejected(err) { + markFatalRemaining(results, remainingIdx, remainingOps, failedName, err) + + return results, err + } + + rejectedAt := indexOfName(remainingOps, failedName) + if rejectedAt < 0 { + for _, idx := range remainingIdx { + results[idx].Status = store.BatchStatusNotAttempted + results[idx].Error = err + } + + return results, err + } + + results[remainingIdx[rejectedAt]].Status = store.BatchStatusRejected + results[remainingIdx[rejectedAt]].Error = err + remainingIdx = append(remainingIdx[:rejectedAt], remainingIdx[rejectedAt+1:]...) + remainingOps = append(remainingOps[:rejectedAt], remainingOps[rejectedAt+1:]...) + } + + return results, nil +} + +func (batch *Batch) queue(op queuedUpdate) error { + err := validateQueuedUpdate(batch.store.objectFormat, op) + if err != nil { + return err + } + + batch.ops = append(batch.ops, op) + + return nil +} + +func markFatal(results []store.BatchResult, ops []queuedUpdate, at int, err error) { + results[at].Status = store.BatchStatusFatal + results[at].Error = err + + for j := at + 1; j < len(results); j++ { + results[j].Name = ops[j].name + results[j].Status = store.BatchStatusNotAttempted + results[j].Error = err + } +} + +func markFatalRemaining(results []store.BatchResult, remainingIdx []int, remainingOps []queuedUpdate, failedName string, err error) { + fatalMarked := false + + for i, idx := range remainingIdx { + if !fatalMarked && failedName != "" && remainingOps[i].name == failedName { + results[idx].Status = store.BatchStatusFatal + results[idx].Error = err + fatalMarked = true + + continue + } + + results[idx].Status = store.BatchStatusNotAttempted + results[idx].Error = err + } +} + +func indexOfName(ops []queuedUpdate, name string) int { + for i, op := range ops { + if op.name == name { + return i + } + } + + return -1 +} diff --git a/ref/store/memory/batch_test.go b/ref/store/memory/batch_test.go new file mode 100644 index 00000000..518dc7b9 --- /dev/null +++ b/ref/store/memory/batch_test.go @@ -0,0 +1,115 @@ +package memory_test + +import ( + "errors" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref" + "lindenii.org/go/furgit/ref/store" + "lindenii.org/go/furgit/ref/store/memory" +) + +func TestBatchRejectsDuplicateResolvedTargetAndAppliesRemainder(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + m := memory.New(objectFormat) + mainID := objectFormat.Sum([]byte("main")) + devID := objectFormat.Sum([]byte("dev")) + nextMainID := objectFormat.Sum([]byte("next-main")) + nextDevID := objectFormat.Sum([]byte("next-dev")) + aliasID := objectFormat.Sum([]byte("alias")) + + seed(t, m, func(tx store.Transaction) { + err := tx.Create("refs/heads/main", mainID) + if err != nil { + t.Fatalf("Create(main): %v", err) + } + + err = tx.Create("refs/heads/dev", devID) + if err != nil { + t.Fatalf("Create(dev): %v", err) + } + + err = tx.CreateSymbolic("refs/heads/alias", "refs/heads/main") + if err != nil { + t.Fatalf("CreateSymbolic(alias): %v", err) + } + }) + + batch, err := m.BeginBatch() + if err != nil { + t.Fatalf("BeginBatch: %v", err) + } + + err = batch.Update("refs/heads/main", nextMainID, mainID) + if err != nil { + t.Fatalf("Update(main): %v", err) + } + + // Updates the symbolic alias in deref mode, + // which resolves to refs/heads/main + // and therefore duplicates the first operation. + err = batch.Update("refs/heads/alias", aliasID, mainID) + if err != nil { + t.Fatalf("Update(alias): %v", err) + } + + err = batch.Update("refs/heads/dev", nextDevID, devID) + if err != nil { + t.Fatalf("Update(dev): %v", err) + } + + results, err := batch.Apply() + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if len(results) != 3 { + t.Fatalf("len(results) = %d, want 3", len(results)) + } + + if results[0].Status != store.BatchStatusApplied { + t.Fatalf("results[0].Status = %v, want applied", results[0].Status) + } + + if results[1].Status != store.BatchStatusRejected { + t.Fatalf("results[1].Status = %v, want rejected", results[1].Status) + } + + if !errors.Is(results[1].Error, store.ErrDuplicateUpdate) { + t.Fatalf("results[1].Error = %v, want ErrDuplicateUpdate", results[1].Error) + } + + if results[2].Status != store.BatchStatusApplied { + t.Fatalf("results[2].Status = %v, want applied", results[2].Status) + } + + if got := resolveDirect(t, m, "refs/heads/main").ID; got != nextMainID { + t.Fatalf("main after batch = %v, want %v", got, nextMainID) + } + + if got := resolveDirect(t, m, "refs/heads/dev").ID; got != nextDevID { + t.Fatalf("dev after batch = %v, want %v", got, nextDevID) + } + + resolved, err := m.Resolve("refs/heads/alias") + if err != nil { + t.Fatalf("Resolve(alias): %v", err) + } + + symbolic, ok := resolved.(ref.Symbolic) + if !ok { + t.Fatalf("Resolve(alias) = %T, want ref.Symbolic", resolved) + } + + if symbolic.Target != "refs/heads/main" { + t.Fatalf("alias target = %q, want refs/heads/main", symbolic.Target) + } + }) + } +} diff --git a/ref/store/memory/doc.go b/ref/store/memory/doc.go new file mode 100644 index 00000000..37a829b0 --- /dev/null +++ b/ref/store/memory/doc.go @@ -0,0 +1,2 @@ +// Package memory provides one in-memory reference store. +package memory diff --git a/ref/store/memory/helpers_test.go b/ref/store/memory/helpers_test.go new file mode 100644 index 00000000..a7973f13 --- /dev/null +++ b/ref/store/memory/helpers_test.go @@ -0,0 +1,46 @@ +package memory_test + +import ( + "testing" + + "lindenii.org/go/furgit/ref" + "lindenii.org/go/furgit/ref/store" + "lindenii.org/go/furgit/ref/store/memory" +) + +// resolveDirect resolves name and asserts that it is a direct reference. +// +// Unlike Memory.ResolveToDirect, it does not follow symbolic references. +func resolveDirect(t *testing.T, memory *memory.Memory, name string) ref.Direct { + t.Helper() + + resolved, err := memory.Resolve(name) + if err != nil { + t.Fatalf("Resolve(%q): %v", name, err) + } + + direct, ok := resolved.(ref.Direct) + if !ok { + t.Fatalf("Resolve(%q) = %T, want ref.Direct", name, resolved) + } + + return direct +} + +// seed runs fn against a fresh transaction and commits it, +// failing the test on any error. +func seed(t *testing.T, memory *memory.Memory, fn func(tx store.Transaction)) { + t.Helper() + + tx, err := memory.BeginTransaction() + if err != nil { + t.Fatalf("BeginTransaction: %v", err) + } + + fn(tx) + + err = tx.Commit() + if err != nil { + t.Fatalf("Commit: %v", err) + } +} diff --git a/ref/store/memory/memory.go b/ref/store/memory/memory.go new file mode 100644 index 00000000..3c8f4968 --- /dev/null +++ b/ref/store/memory/memory.go @@ -0,0 +1,43 @@ +package memory + +import ( + "sync" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref/store" +) + +// Memory reads and writes one in-memory Git reference namespace. +// +// Labels: Close-Caller. +type Memory struct { + mu sync.RWMutex //exhaustruct:optional + objectFormat id.ObjectFormat + refs map[string]storedRef +} + +var ( + _ store.Reader = (*Memory)(nil) + _ store.Transactioner = (*Memory)(nil) + _ store.Batcher = (*Memory)(nil) +) + +// New builds one empty in-memory reference store for one object format. +func New(objectFormat id.ObjectFormat) *Memory { + return &Memory{ + objectFormat: objectFormat, + refs: make(map[string]storedRef), + } +} + +// ObjectFormat returns the object format used by the store. +func (memory *Memory) ObjectFormat() id.ObjectFormat { + return memory.objectFormat +} + +// Close closes the in-memory reference store. +// +// Labels: MT-Unsafe. +func (memory *Memory) Close() error { + return nil +} diff --git a/ref/store/memory/read.go b/ref/store/memory/read.go new file mode 100644 index 00000000..540b7576 --- /dev/null +++ b/ref/store/memory/read.go @@ -0,0 +1,78 @@ +package memory + +import ( + "fmt" + + "lindenii.org/go/furgit/ref" + "lindenii.org/go/furgit/ref/store" +) + +// Resolve resolves one reference name from the in-memory namespace. +func (memory *Memory) Resolve(name string) (ref.Ref, error) { + memory.mu.RLock() + defer memory.mu.RUnlock() + + return publicRef(name, memory.refs[name]) +} + +// ResolveToDirect resolves symbolic references +// until one direct reference is reached. +func (memory *Memory) ResolveToDirect(name string) (ref.Direct, error) { + memory.mu.RLock() + defer memory.mu.RUnlock() + + return memory.resolveToDirectLocked(name) +} + +func (memory *Memory) resolveToDirectLocked(name string) (ref.Direct, error) { + cur := name + seen := make(map[string]struct{}) + + for { + if _, ok := seen[cur]; ok { + return ref.Direct{}, fmt.Errorf("%w: at %q", store.ErrSymbolicCycle, cur) + } + + seen[cur] = struct{}{} + + resolved, err := publicRef(cur, memory.refs[cur]) + if err != nil { + return ref.Direct{}, err + } + + switch resolved := resolved.(type) { + case ref.Direct: + return resolved, nil + case ref.Symbolic: + if resolved.Target == "" { + return ref.Direct{}, fmt.Errorf( + "%w: symbolic reference %q has empty target", + store.ErrInvalidValue, resolved.Name(), + ) + } + + cur = resolved.Target + default: + panic(fmt.Sprintf("ref/store/memory: unsupported reference type %T", resolved)) + } + } +} + +func publicRef(name string, stored storedRef) (ref.Ref, error) { + switch stored.kind { + case storedDirect: + direct := ref.Direct{RefName: name, ID: stored.id, Peeled: nil} + if stored.peeled != nil { + peeled := *stored.peeled + direct.Peeled = &peeled + } + + return direct, nil + case storedSymbolic: + return ref.Symbolic{RefName: name, Target: stored.target}, nil + case storedMissing: + return nil, store.ErrReferenceNotFound + default: + panic(fmt.Sprintf("ref/store/memory: unsupported stored reference kind %d", stored.kind)) + } +} diff --git a/ref/store/memory/read_test.go b/ref/store/memory/read_test.go new file mode 100644 index 00000000..5c082794 --- /dev/null +++ b/ref/store/memory/read_test.go @@ -0,0 +1,80 @@ +package memory_test + +import ( + "errors" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref" + "lindenii.org/go/furgit/ref/store" + "lindenii.org/go/furgit/ref/store/memory" +) + +func TestResolveSymbolic(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + m := memory.New(objectFormat) + mainID := objectFormat.Sum([]byte("main")) + + seed(t, m, func(tx store.Transaction) { + err := tx.Create("refs/heads/main", mainID) + if err != nil { + t.Fatalf("Create(main): %v", err) + } + + err = tx.CreateSymbolic("HEAD", "refs/heads/main") + if err != nil { + t.Fatalf("CreateSymbolic(HEAD): %v", err) + } + }) + + head, err := m.Resolve("HEAD") + if err != nil { + t.Fatalf("Resolve(HEAD): %v", err) + } + + symbolic, ok := head.(ref.Symbolic) + if !ok { + t.Fatalf("Resolve(HEAD) = %T, want ref.Symbolic", head) + } + + if symbolic.Target != "refs/heads/main" { + t.Fatalf("HEAD target = %q, want refs/heads/main", symbolic.Target) + } + + direct, err := m.ResolveToDirect("HEAD") + if err != nil { + t.Fatalf("ResolveToDirect(HEAD): %v", err) + } + + if direct.ID != mainID { + t.Fatalf("ResolveToDirect(HEAD) ID = %v, want %v", direct.ID, mainID) + } + }) + } +} + +func TestResolveMissing(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + m := memory.New(objectFormat) + + _, err := m.Resolve("refs/heads/absent") + if err == nil { + t.Fatalf("Resolve(absent) succeeded, want ErrReferenceNotFound") + } + + if !errors.Is(err, store.ErrReferenceNotFound) { + t.Fatalf("Resolve(absent) err = %v, want ErrReferenceNotFound", err) + } + }) + } +} diff --git a/ref/store/memory/ref.go b/ref/store/memory/ref.go new file mode 100644 index 00000000..1286c358 --- /dev/null +++ b/ref/store/memory/ref.go @@ -0,0 +1,43 @@ +package memory + +import "lindenii.org/go/furgit/object/id" + +// storedRef is the internal representation of one reference. +// +// Unlike the public ref values, +// it carries no name of its own; +// the name is the map key. +type storedRef struct { + kind storedKind + id id.ObjectID //exhaustruct:optional + target string //exhaustruct:optional + peeled *id.ObjectID //exhaustruct:optional +} + +type storedKind uint8 + +const ( + storedMissing storedKind = iota + storedDirect + storedSymbolic +) + +func cloneStoredRef(stored storedRef) storedRef { + if stored.peeled == nil { + return stored + } + + peeled := *stored.peeled + stored.peeled = &peeled + + return stored +} + +func cloneRefs(refs map[string]storedRef) map[string]storedRef { + cloned := make(map[string]storedRef, len(refs)) + for name, stored := range refs { + cloned[name] = cloneStoredRef(stored) + } + + return cloned +} diff --git a/ref/store/memory/transaction.go b/ref/store/memory/transaction.go new file mode 100644 index 00000000..e68f8cab --- /dev/null +++ b/ref/store/memory/transaction.go @@ -0,0 +1,95 @@ +package memory + +import ( + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref/store" +) + +// Transaction stages in-memory updates for one atomic commit. +type Transaction struct { + store *Memory + ops []queuedUpdate +} + +var _ store.Transaction = (*Transaction)(nil) + +// BeginTransaction creates one new in-memory transaction. +func (memory *Memory) BeginTransaction() (store.Transaction, error) { + return &Transaction{ + store: memory, + ops: make([]queuedUpdate, 0, 8), + }, nil +} + +// Create queues a direct reference creation. +func (tx *Transaction) Create(name string, newID id.ObjectID) error { + return tx.queue(queuedUpdate{name: name, kind: updateCreate, newID: newID}) +} + +// Update queues a direct reference update. +func (tx *Transaction) Update(name string, newID, oldID id.ObjectID) error { + return tx.queue(queuedUpdate{name: name, kind: updateReplace, newID: newID, oldID: oldID}) +} + +// Delete queues a direct reference deletion. +func (tx *Transaction) Delete(name string, oldID id.ObjectID) error { + return tx.queue(queuedUpdate{name: name, kind: updateDelete, oldID: oldID}) +} + +// Verify queues a direct reference verification. +func (tx *Transaction) Verify(name string, oldID id.ObjectID) error { + return tx.queue(queuedUpdate{name: name, kind: updateVerify, oldID: oldID}) +} + +// CreateSymbolic queues a symbolic reference creation. +func (tx *Transaction) CreateSymbolic(name, newTarget string) error { + return tx.queue(queuedUpdate{name: name, kind: updateCreateSymbolic, newTarget: newTarget}) +} + +// UpdateSymbolic queues a symbolic reference update. +func (tx *Transaction) UpdateSymbolic(name, newTarget, oldTarget string) error { + return tx.queue(queuedUpdate{name: name, kind: updateReplaceSymbolic, newTarget: newTarget, oldTarget: oldTarget}) +} + +// DeleteSymbolic queues a symbolic reference deletion. +func (tx *Transaction) DeleteSymbolic(name, oldTarget string) error { + return tx.queue(queuedUpdate{name: name, kind: updateDeleteSymbolic, oldTarget: oldTarget}) +} + +// VerifySymbolic queues a symbolic reference verification. +func (tx *Transaction) VerifySymbolic(name, oldTarget string) error { + return tx.queue(queuedUpdate{name: name, kind: updateVerifySymbolic, oldTarget: oldTarget}) +} + +// Commit validates and applies the queued updates atomically. +func (tx *Transaction) Commit() error { + tx.store.mu.Lock() + defer tx.store.mu.Unlock() + + prepared, _, err := prepareUpdates(tx.store.refs, tx.ops) + if err != nil { + return err + } + + next := cloneRefs(tx.store.refs) + applyPreparedUpdates(next, prepared) + tx.store.refs = next + + return nil +} + +// Abort abandons the transaction. +func (tx *Transaction) Abort() error { + return nil +} + +func (tx *Transaction) queue(op queuedUpdate) error { + err := validateQueuedUpdate(tx.store.objectFormat, op) + if err != nil { + return err + } + + tx.ops = append(tx.ops, op) + + return nil +} diff --git a/ref/store/memory/transaction_test.go b/ref/store/memory/transaction_test.go new file mode 100644 index 00000000..75ac3f88 --- /dev/null +++ b/ref/store/memory/transaction_test.go @@ -0,0 +1,96 @@ +package memory_test + +import ( + "errors" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/ref/store" + "lindenii.org/go/furgit/ref/store/memory" +) + +func TestTransactionRejectLeavesStoreUnchanged(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + m := memory.New(objectFormat) + mainID := objectFormat.Sum([]byte("main")) + devID := objectFormat.Sum([]byte("dev")) + nextID := objectFormat.Sum([]byte("next")) + wrongOld := objectFormat.Sum([]byte("wrong")) + + seed(t, m, func(tx store.Transaction) { + err := tx.Create("refs/heads/main", mainID) + if err != nil { + t.Fatalf("Create(main): %v", err) + } + + err = tx.Create("refs/heads/dev", devID) + if err != nil { + t.Fatalf("Create(dev): %v", err) + } + }) + + tx, err := m.BeginTransaction() + if err != nil { + t.Fatalf("BeginTransaction: %v", err) + } + + err = tx.Update("refs/heads/main", nextID, mainID) + if err != nil { + t.Fatalf("Update(main): %v", err) + } + + err = tx.Update("refs/heads/dev", nextID, wrongOld) + if err != nil { + t.Fatalf("Update(dev): %v", err) + } + + err = tx.Commit() + if err == nil { + t.Fatalf("Commit succeeded, want WrongOldIDError") + } + + if _, ok := errors.AsType[*store.WrongOldIDError](err); !ok { + t.Fatalf("Commit error = %T %v, want *store.WrongOldIDError", err, err) + } + + if got := resolveDirect(t, m, "refs/heads/main").ID; got != mainID { + t.Fatalf("main after rejected transaction = %v, want %v", got, mainID) + } + + if got := resolveDirect(t, m, "refs/heads/dev").ID; got != devID { + t.Fatalf("dev after rejected transaction = %v, want %v", got, devID) + } + }) + } +} + +func TestTransactionRejectsForeignObjectFormat(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + m := memory.New(objectFormat) + + tx, err := m.BeginTransaction() + if err != nil { + t.Fatalf("BeginTransaction: %v", err) + } + + err = tx.Create("refs/heads/main", id.ObjectID{}) + if err == nil { + t.Fatalf("Create with unset ID succeeded, want ErrInvalidValue") + } + + if !errors.Is(err, store.ErrInvalidValue) { + t.Fatalf("Create error = %v, want ErrInvalidValue", err) + } + }) + } +} diff --git a/ref/store/memory/update.go b/ref/store/memory/update.go new file mode 100644 index 00000000..8e8c6e30 --- /dev/null +++ b/ref/store/memory/update.go @@ -0,0 +1,372 @@ +package memory + +import ( + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" + refname "lindenii.org/go/furgit/ref/name" + "lindenii.org/go/furgit/ref/store" +) + +type updateKind uint8 + +const ( + updateCreate updateKind = iota + updateReplace + updateDelete + updateVerify + updateCreateSymbolic + updateReplaceSymbolic + updateDeleteSymbolic + updateVerifySymbolic +) + +type queuedUpdate struct { + name string + kind updateKind + newID id.ObjectID //exhaustruct:optional + oldID id.ObjectID //exhaustruct:optional + newTarget string //exhaustruct:optional + oldTarget string //exhaustruct:optional +} + +type resolvedUpdateTarget struct { + name string + ref storedRef +} + +type preparedUpdate struct { + op queuedUpdate + target resolvedUpdateTarget +} + +// validateQueuedUpdate checks one operation at queue time, +// rejecting malformed names and values +// before they can enter a transaction or batch. +func validateQueuedUpdate(objectFormat id.ObjectFormat, op queuedUpdate) error { + switch op.kind { + case updateCreate, updateReplace: + err := refname.ValidateUpdateName(op.name, true) + if err != nil { + return fmt.Errorf("ref/store/memory: %w", err) + } + + if op.newID.ObjectFormat() != objectFormat { + return fmt.Errorf("%w: object id format mismatch", store.ErrInvalidValue) + } + case updateDelete, updateVerify: + err := refname.ValidateUpdateName(op.name, false) + if err != nil { + return fmt.Errorf("ref/store/memory: %w", err) + } + + if op.oldID.ObjectFormat() != objectFormat { + return fmt.Errorf("%w: object id format mismatch", store.ErrInvalidValue) + } + case updateCreateSymbolic, updateReplaceSymbolic: + err := refname.ValidateUpdateName(op.name, true) + if err != nil { + return fmt.Errorf("ref/store/memory: %w", err) + } + + if op.newTarget == "" { + return fmt.Errorf("%w: empty symbolic target", store.ErrInvalidValue) + } + + err = refname.ValidateSymbolicTarget(op.name, op.newTarget) + if err != nil { + return fmt.Errorf("ref/store/memory: %w", err) + } + case updateDeleteSymbolic, updateVerifySymbolic: + err := refname.ValidateUpdateName(op.name, false) + if err != nil { + return fmt.Errorf("ref/store/memory: %w", err) + } + default: + panic(fmt.Sprintf("ref/store/memory: unsupported update operation %d", op.kind)) + } + + if op.kind == updateReplaceSymbolic || op.kind == updateDeleteSymbolic || op.kind == updateVerifySymbolic { + if op.oldTarget == "" { + return fmt.Errorf("%w: empty symbolic old target", store.ErrInvalidValue) + } + } + + return nil +} + +// prepareUpdates resolves, conflict-checks, and verifies a queued operation +// set against refs without mutating it. +// On failure it returns the name of the offending operation alongside the error. +func prepareUpdates(refs map[string]storedRef, ops []queuedUpdate) ([]preparedUpdate, string, error) { + prepared, name, err := resolvePreparedUpdates(refs, ops) + if err != nil { + return prepared, name, err + } + + deleted, written := collectPreparedWrites(prepared) + existing := collectVisibleNames(refs) + + for _, name := range written { + err = verifyRefnameAvailable(name, existing, written, deleted) + if err != nil { + return prepared, name, err + } + } + + name, err = verifyPreparedUpdates(refs, prepared) + if err != nil { + return prepared, name, err + } + + return prepared, "", nil +} + +func resolvePreparedUpdates(refs map[string]storedRef, ops []queuedUpdate) ([]preparedUpdate, string, error) { + prepared := make([]preparedUpdate, 0, len(ops)) + targets := make(map[string]struct{}, len(ops)) + + for _, op := range ops { + target, err := resolveQueuedUpdateTarget(refs, op) + if err != nil { + return prepared, op.name, err + } + + if _, exists := targets[target.name]; exists { + return prepared, op.name, store.ErrDuplicateUpdate + } + + targets[target.name] = struct{}{} + prepared = append(prepared, preparedUpdate{op: op, target: target}) + } + + return prepared, "", nil +} + +func resolveQueuedUpdateTarget(refs map[string]storedRef, op queuedUpdate) (resolvedUpdateTarget, error) { + switch op.kind { + case updateCreate: + return resolveOrdinaryTarget(refs, op.name, true) + case updateReplace, updateDelete, updateVerify: + return resolveOrdinaryTarget(refs, op.name, false) + case updateCreateSymbolic, updateReplaceSymbolic, updateDeleteSymbolic, updateVerifySymbolic: + return resolvedUpdateTarget{name: op.name, ref: directRead(refs, op.name)}, nil + default: + panic(fmt.Sprintf("ref/store/memory: unsupported update operation %d", op.kind)) + } +} + +func resolveOrdinaryTarget(refs map[string]storedRef, name string, allowMissing bool) (resolvedUpdateTarget, error) { + cur := name + seen := make(map[string]struct{}) + + for { + if _, ok := seen[cur]; ok { + return resolvedUpdateTarget{}, fmt.Errorf("%w: at %q", store.ErrSymbolicCycle, cur) + } + + seen[cur] = struct{}{} + + refState := directRead(refs, cur) + switch refState.kind { + case storedMissing: + if !allowMissing { + return resolvedUpdateTarget{}, store.ErrReferenceNotFound + } + + return resolvedUpdateTarget{name: cur, ref: refState}, nil + case storedDirect: + return resolvedUpdateTarget{name: cur, ref: refState}, nil + case storedSymbolic: + if refState.target == "" { + return resolvedUpdateTarget{}, fmt.Errorf( + "%w: symbolic reference has empty target", store.ErrInvalidValue, + ) + } + + cur = refState.target + default: + panic(fmt.Sprintf("ref/store/memory: unsupported stored reference kind %d", refState.kind)) + } + } +} + +func directRead(refs map[string]storedRef, name string) storedRef { + stored, ok := refs[name] + if !ok { + return storedRef{kind: storedMissing} + } + + return cloneStoredRef(stored) +} + +func collectPreparedWrites(prepared []preparedUpdate) (deleted map[string]struct{}, written []string) { + deleted = make(map[string]struct{}) + written = make([]string, 0, len(prepared)) + + for _, item := range prepared { + switch item.op.kind { + case updateDelete, updateDeleteSymbolic: + deleted[item.target.name] = struct{}{} + case updateCreate, updateReplace, updateCreateSymbolic, updateReplaceSymbolic: + written = append(written, item.target.name) + case updateVerify, updateVerifySymbolic: + default: + panic(fmt.Sprintf("ref/store/memory: unsupported update operation %d", item.op.kind)) + } + } + + return deleted, written +} + +func collectVisibleNames(refs map[string]storedRef) map[string]struct{} { + names := make(map[string]struct{}, len(refs)) + for name := range refs { + names[name] = struct{}{} + } + + return names +} + +func verifyRefnameAvailable(name string, existing map[string]struct{}, writes []string, deleted map[string]struct{}) error { + for existingName := range existing { + if existingName == name { + continue + } + + if _, skip := deleted[existingName]; skip { + continue + } + + if refnamesConflict(name, existingName) { + return &store.NameConflictError{Other: existingName} + } + } + + for _, other := range writes { + if other == name { + continue + } + + if refnamesConflict(name, other) { + return &store.NameConflictError{Other: other} + } + } + + return nil +} + +func refnamesConflict(left, right string) bool { + return left == right || + hasPathPrefix(left, right) || + hasPathPrefix(right, left) +} + +func hasPathPrefix(name, prefix string) bool { + return len(name) > len(prefix) && + name[len(prefix)] == '/' && + name[:len(prefix)] == prefix +} + +func verifyPreparedUpdates(refs map[string]storedRef, prepared []preparedUpdate) (string, error) { + for i := range prepared { + item := &prepared[i] + item.target.ref = directRead(refs, item.target.name) + + err := verifyPreparedUpdateCurrent(*item) + if err != nil { + return item.op.name, err + } + } + + return "", nil +} + +func verifyPreparedUpdateCurrent(item preparedUpdate) error { + switch item.op.kind { + case updateCreate, updateCreateSymbolic: + if item.target.ref.kind != storedMissing { + return store.ErrCreateExists + } + + return nil + case updateReplace, updateDelete, updateVerify: + if item.target.ref.kind == storedMissing { + return store.ErrReferenceNotFound + } + + if item.target.ref.kind != storedDirect { + return store.ErrExpectedDirect + } + + if item.target.ref.id != item.op.oldID { + return &store.WrongOldIDError{Actual: item.target.ref.id, Expected: item.op.oldID} + } + + return nil + case updateReplaceSymbolic, updateDeleteSymbolic, updateVerifySymbolic: + if item.target.ref.kind == storedMissing { + return store.ErrReferenceNotFound + } + + if item.target.ref.kind != storedSymbolic { + return store.ErrExpectedSymbolic + } + + if item.target.ref.target != item.op.oldTarget { + return &store.WrongOldTargetError{Actual: item.target.ref.target, Expected: item.op.oldTarget} + } + + return nil + default: + panic(fmt.Sprintf("ref/store/memory: unsupported update operation %d", item.op.kind)) + } +} + +func applyPreparedUpdates(refs map[string]storedRef, prepared []preparedUpdate) { + for _, item := range prepared { + switch item.op.kind { + case updateCreate, updateReplace: + refs[item.target.name] = storedRef{kind: storedDirect, id: item.op.newID} + case updateCreateSymbolic, updateReplaceSymbolic: + refs[item.target.name] = storedRef{kind: storedSymbolic, target: item.op.newTarget} + case updateDelete, updateDeleteSymbolic: + delete(refs, item.target.name) + case updateVerify, updateVerifySymbolic: + default: + panic(fmt.Sprintf("ref/store/memory: unsupported update operation %d", item.op.kind)) + } + } +} + +// isBatchRejected reports whether err is a per-operation rejection +// that should drop only the offending operation, +// rather than a fatal failure that aborts the whole batch. +func isBatchRejected(err error) bool { + switch { + case errors.Is(err, store.ErrReferenceNotFound), + errors.Is(err, store.ErrCreateExists), + errors.Is(err, store.ErrDuplicateUpdate), + errors.Is(err, store.ErrExpectedDirect), + errors.Is(err, store.ErrExpectedSymbolic), + errors.Is(err, store.ErrInvalidValue), + errors.Is(err, store.ErrSymbolicCycle), + errors.Is(err, refname.ErrInvalidName): + return true + } + + if _, ok := errors.AsType[*store.NameConflictError](err); ok { + return true + } + + if _, ok := errors.AsType[*store.WrongOldIDError](err); ok { + return true + } + + if _, ok := errors.AsType[*store.WrongOldTargetError](err); ok { + return true + } + + return false +} diff --git a/ref/store/reading.go b/ref/store/reading.go new file mode 100644 index 00000000..edb8a20e --- /dev/null +++ b/ref/store/reading.go @@ -0,0 +1,34 @@ +package store + +import "lindenii.org/go/furgit/ref" + +// Reader reads Git references. +// +// Labels: MT-Safe. +type Reader interface { + // Resolve resolves a reference name + // to either a symbolic or direct ref. + // + // Implementations return value forms + // ([ref.Direct] or [ref.Symbolic]), + // not pointer forms. + // If the reference does not exist, + // implementations return [ErrReferenceNotFound]. + // + // Labels: Life-Parent. + Resolve(name string) (ref.Ref, error) + + // ResolveToDirect resolves a reference name to a direct reference, + // following symbolic references until one is reached. + // + // It follows symbolic references only; + // it does not peel annotated tag objects. + // + // Implementations may follow symbolic hops with backend-local lookup. + // Callers that need cross-backend symbolic resolution + // (for example across a chain of stores) + // should prefer repeatedly calling Resolve. + // + // Labels: Life-Parent. + ResolveToDirect(name string) (ref.Direct, error) +} diff --git a/ref/store/transaction.go b/ref/store/transaction.go new file mode 100644 index 00000000..1f61551a --- /dev/null +++ b/ref/store/transaction.go @@ -0,0 +1,57 @@ +package store + +import "lindenii.org/go/furgit/object/id" + +// Transaction stages reference updates for one atomic commit. +// +// Ordinary methods operate in dereference mode: +// if name resolves to a symbolic ref, +// the operation applies to the final referent +// rather than to the symbolic ref itself. +// +// Symbolic methods operate on the named reference directly, +// without dereferencing symbolic refs. +// +// Labels: MT-Unsafe. +type Transaction interface { + // Create creates one direct reference, + // requiring that the logical reference does not already exist. + Create(name string, newID id.ObjectID) error + + // Update updates one direct reference, + // requiring that the current logical reference value matches oldID. + Update(name string, newID, oldID id.ObjectID) error + + // Delete deletes one direct reference, + // requiring that the current logical reference value matches oldID. + Delete(name string, oldID id.ObjectID) error + + // Verify verifies that the current logical reference value matches oldID. + Verify(name string, oldID id.ObjectID) error + + // CreateSymbolic creates one symbolic reference, + // requiring that the named reference does not already exist. + CreateSymbolic(name, newTarget string) error + + // UpdateSymbolic updates one symbolic reference directly, + // requiring that its current target matches oldTarget. + UpdateSymbolic(name, newTarget, oldTarget string) error + + // DeleteSymbolic deletes one symbolic reference directly, + // requiring that its current target matches oldTarget. + DeleteSymbolic(name, oldTarget string) error + + // VerifySymbolic verifies that the named symbolic reference + // currently points at oldTarget. + VerifySymbolic(name, oldTarget string) error + + // Commit validates and applies all queued operations atomically. + // + // Commit invalidates the receiver. + Commit() error + + // Abort abandons the transaction and releases any resources it holds. + // + // Abort invalidates the receiver. + Abort() error +} diff --git a/ref/store/transactional_store.go b/ref/store/transactional_store.go new file mode 100644 index 00000000..e8b46413 --- /dev/null +++ b/ref/store/transactional_store.go @@ -0,0 +1,13 @@ +package store + +// Transactioner begins atomic reference transactions. +// +// Implementations should only satisfy Transactioner +// when they can stage and commit reference updates +// atomically within that backend. +type Transactioner interface { + // BeginTransaction creates one new mutable transaction. + // + // Labels: Deps-Borrowed, Life-Parent. + BeginTransaction() (Transaction, error) +} diff --git a/research/dynamic_packfiles.txt b/research/dynamic_packfiles.txt new file mode 100644 index 00000000..e4fe7e54 --- /dev/null +++ b/research/dynamic_packfiles.txt @@ -0,0 +1,179 @@ +dynamic packfiles to append objects + +gc/refcount process punches page-sized holes in them for pages fully +within the space of unwanted objects, after setting a tombstone mark + +holes are recorded in an index and re-used + +then, if desired, the repack process removes all the punched holes +and anything surrounding from unwanted objects that are slightly out +of the page boundary + +repack is not really git's repack algorithm, it's bascially just +defragmentation. + +genreational bloom filters + +idx design +========== + +so, let's first get our invariants and patterns clear. + +* fixed-length cryptographic object IDs +* essentially uniform key distribution +* exact lookup only, no range scans, no ordered iteration requirements +* reads are extremely important +* writes are mostly append-like +* deletes/tombstones may happen later but are secondary + +1st design +---------- + +* mutable front index +* immutable base index +* period merge/compaction into a new base generation + + + +upload-pack/send-pack/defrag +============================ + +take current pack, remove dead objects/holes, filter objects out, record +offsets and adjust ofs_deltas since they always go backwards, write the pack +back; then stream written pack to client. two-step necessary because pack +header includes object count; could have a custom new protocol that doesn't do +so. + +random chat log dump +==================== +<~runxiyu> ori: actually. i think my hashtable-ish .idx scheme doesn't work really well with e.g. "user provided us a small part of the hash" +<~runxiyu> and when using the Git CLI, abbreviated hashes are extremely common.... +<~runxiyu> not lik ei'd need them in a *forge* +<~runxiyu> but ugh +<~runxiyu> i guess i'm going with some sort of b-tree :(( +<~runxiyu> ~~maybe i should just port gefs to git~~ +<&ori> runxiyu: why not? you should be able to pick the pages based on the prefix and then scan, no? +<~rx> ori: i need to somehow munge the has to prevent page directory explosions +<~rx> the hash* +<~rx> e.g. siphash(objectid, secret) +<~rx> otherwise an attacker could give you 10M objects that start with 00000 and whatnot +<&ori> what's the worst case that would happen there, and is it exponentially worse than giving you 10M objects that start with anything? +<&ori> I'm thinking that you can't generate a case worse than 256/nobject extra table lookups, assuming one bit per fanout.. +<~runxiyu> ori: for extendible hashing, yes, definitely worse +<~runxiyu> the directory will expand a lot for no good reason +<&ori> yes, but you have 256 bits of hash +<&ori> how much is a lot worse? +<&ori> what's the worst an attacker can do, and how is the impact worse than uploading 10M giant objects? +<&ori> also, spotted a bag of kuai kuai keeping the cash register working today at a tea shop +<~runxiyu> waitt +<~runxiyu> hmmm + * runxiyu looks agagin if it's O(N) or O(2^N) +<~runxiyu> well +<~runxiyu> i think it should be a O(2^n) directory size when the attacker can control n bits prefix +<&ori> what's the 'n' here? +<~runxiyu> > can control n bits prefix +<&ori> yeah, you run out of prefix pretty quickly, though +<&ori> I'm not seeing how you could get an exponential blowup if you share pages +<&ori> may be missing something, though +<~runxiyu> hm +<&ori> oh, wait, I see +<&ori> no, wait +<~runxiyu> i think im confusing myself too to some extent but something doesn't feel right +<~runxiyu> urgh +<~runxiyu> okay, rethinking this +<~runxiyu> d is the global depth +<~runxiyu> diretory size is 2^d +<~runxiyu> B records per bucket +<~runxiyu> whatever happens inside the bucket idc, let's say it's a linked list +<~runxiyu> whatever happens inside the bucket idc, let's say it's an array* (linked lists suck) +<~runxiyu> l <= d +<~runxiyu> (l being the local depth of a bucket) +<~runxiyu> normal: d = log^2(N/B) +<&ori> ahh, I see. +<~runxiyu> N is the object count +<&ori> yes, so what if you binary searched the page directory, or made it multi-level +<~runxiyu> an attacker could grab a giant repo and find commonly-prefixed objects, they don't need to brute force their own +<~runxiyu> ori: remember we're trying to do something easy to add new objects into +<~runxiyu> how'd you do that with a binary search? +<~runxiyu> not sure what you mean by multi-level yet here +<~runxiyu> well, it could just turn into a b+tree... +<~runxiyu> hm +<&ori> multilevel -- you have pd[0] using bits 0..n +<~runxiyu> maybe an lmdb object store isn't too bad after all +<&ori> pd[0][1] using bits n...m +<&ori> etc +<&ori> and the reason I was a bit confused was that I had thought the directory was a trie +<&ori> rather than just an expanding top level directory +<~runxiyu> ah +<&ori> so, yeah, I was thinking you could make the page directory an actual trie +<~runxiyu> sigh +<~runxiyu> i guess abbreviated object IDs is something i can't really skip. +<~runxiyu> ori: ill look into radix trees and LSM trees too +<~runxiyu> well, you're basically suggesting a radix tree i guess +<~runxiyu> well actually +<~runxiyu> radix might not necessarily be the best trie here +<~runxiyu> idk +<~runxiyu> hm +<~runxiyu> firstly im really heavy on reads +<~runxiyu> and random keys with no sequential access +<~runxiyu> ok LSM makes no sense +<&hax[xor]> > O(2^N) +<~runxiyu> ori: thoughts on how to make tries reasonable to use on disks? +<&hax[xor]> that sounds like something is already very broken +<~runxiyu> hax[xor]: wdym +<&hax[xor]> directory size should absolutely not scale like that +<~runxiyu> hax[xor]: maybe read up on how extendible hsahing works again? +<&hax[xor]> probably but if that's how it scales it still sounds verybroken +<~runxiyu> n is not the amount of objects +<~runxiyu> it's a pathlogic condition caused by chosne-prefix keys +<~runxiyu> (your keys are usually supposed to be hashed into something the attacker can't predict) +<&hax[xor]> if you mean the directory size scales linearly with the number of objects the attacker puts in it... that sounds perfectly normal? +<&ori> runxiyu: same as extendible hashing, just after you extend to, say, 8 bits, you stop splitting the page directory, and have subdirectories +<~runxiyu> ori: that could make senes +<~runxiyu> haven't thought it through +<~runxiyu> directory size is 2^d, d being the global depth +<~runxiyu> urgh i need to review for exams +<~runxiyu> okay +<~runxiyu> write amplification issue +<~runxiyu> im not sure how significant this is for realistic git workloads +<~runxiyu> i haven't counted, but there should be many, many, many more reads than writes +<~runxiyu> if write amplification is really an issue +<&ori> I may go wander around a bit. +<~runxiyu> then ill just port gefs +<~runxiyu> ori: do you mean IRL, or over dynamic pack data structures- +<&ori> irl. +<~runxiyu> alright that makes more sense :P +<&ori> tomorrow I think I check out Jiufen +<~runxiyu> frick i want to be able to type epsilon with compose +<&ori> is that not possible? +<~runxiyu> i don't seem to be able to +<~runxiyu> but idk the compose tables on my system +<~runxiyu> ε +<~runxiyu> well +<~runxiyu> unicode hex input always works :/ +<~runxiyu> OKAY FUCK +<~runxiyu> I keep getting distracted by interesting things +<~runxiyu> I need to review for my fucking exams +-- Mode #chat [-q runxiyu] by runxiyu +-- Mode #chat [-a runxiyu] by runxiyu +-- #chat: You must be a channel halfop or higher to set channel mode b (ban). +-- Mode #chat [+b mute:account:runxiyu] by runxiyu +-- #chat: You cannot send messages to this channel whilst a m: (mute) extban is set matching you. +-- #chat: You cannot send messages to this channel whilst a m: (mute) extban is set matching you. +<&f_> does that even work? +<&ori> for 9front, <alt>*e gives ε +<&ori> but, don't remember the compose map +<&ori> thought that there was a similar thing for all greek letters + + +See also: +https://github.com/inkandswitch/darn +https://www.youtube.com/watch?v=nk4nefmguZk +https://crates.io/crates/iroh-blobs +https://crates.io/crates/bao-tree + + +Actually, who cares about abbreviated hashes? +Clients. Clients only. +It will be a separate interface satisfied by "normal repos" +but not satisfied by our store. Good enough. diff --git a/research/packfile_bloom.txt b/research/packfile_bloom.txt new file mode 100644 index 00000000..63acafbe --- /dev/null +++ b/research/packfile_bloom.txt @@ -0,0 +1,133 @@ +Packfile bloom filter RFC +========================= + +Problem +------- + +Especially for server-side usages, repacking is extremely expensive, and +creating multi-pack-indexes is still rather expensive. Incremental MIDX +partially solves this, but would defeat the purpose of MIDX when there are too +many of them, as Git would still have to walk the MIDXes in order while +performing expensive indexing queries. + +Idea +---- + +Each MIDX layer, and each non-MIDX index, comes with a bloom filter. MIDXes and +ordinary .idx files are still traversed in their usual order, but the first +step when traversing them, is to check whether that index could possibly have +the desired object, through a bloom filter. + +We will want the filters to be mmaped, and we want the lookup cost to be +dominated by one cache-line read rather than using many scattered reads. +Therefore, a blocked bloom filter is likely the right direction here. The steps +are as follows: + +1. Split the filter into 64-octet buckets, since 64 octets is the most common + cache-line size. +2. Use some bits of the object ID to choose the bucket. +3. Use the rest of the key to choose several bit positions inside that bucket. +4. A lookup thus reads one 64-octet bucket and checks whether all required bits + are set. + +Definitions +----------- + +Let: + + B := number of buckets + K := number of bits set and tested per object ID + +* All integers here are big endian. +* The OID is to be interpreted as a big-endian bitstring, where bit offset 0 + is the most significant bit of octet 0. +* log2(B) + 9K <= hash length in bits. + +File layout +----------- + +* 4-octet signature: {'I', 'D', 'B', 'L'} +* 4-octet version identifier (= 1) +* 4-octet object hash algorithm identifier (= 1 for SHA-1, 2 for SHA-256) +* 4-octet B (number of buckets) +* 2-octet K (number of bits set and tested per object ID) +* 46-octet padding (must be all zeros) +* B buckets of 64 octets each. + +Validation +---------- + +* Matching signature +* Supported version (the rest of the rules are for this version) +* Hash function identifier must be recognized +* B must be nonzero and a power of two +* K must be nonzero +* log2(B) + 9K <= hash length in bits +* Padding must be all zero +* File size must be 64 + 64 * B octets + +Lookup procedure +---------------- + +1. Let b be the unsigned integer encoded by the most significant log2(B) bits + of OID. B is a power of two, and 0 <= b < B. +2. Select and read bucket b. +3. For each 0 <= i < K: + 1. Start immediately after the most significant log2(B) bits of OID, let the + i-th 9-bit field be the bits at offset 9 * i through 9 * i + 8 within the + next 9 * K bits of the OID. + 2. Let pi be the unsigned integer encoded by that 9-bit field. + Then, 0 <= pi < 512. + 3. Compute wi := pi >> 6, and bi := pi & 63. + Thus, wi identifies one of the 8 64-bit words in bucket b, and bi + identifies one bit within that word. + 4. Test whether bi is set in the word wi of bucket b. (Within each 64-bit + word, bit index 0 denotes the most significant bit, and bit index 63 + denotes the least significant bit.) + +If any test fails, the OID is definitely not in the relevant idx. +If all tests succeed, the OID may be in the relevant idx. + +Note that two of the K 9-bit fields can decode to the same pi, which means an +insertion may set fewer than K distinct bits. + +Worked example +-------------- + +Let: + + B = 1 << 15 = 32768 + K = 8 + +Then, log2(B) = 15. Each lookup thus uses 15 bits to choose the bucket +and 8 * 9 = 72 bits to choose the in-bucket positions, for a total of +87 bits taken from the object ID. + +1. Read the first 15 bits of OID and interpret them as b, where + 0 <= b < 32768. +2. Read bucket b. +3. For each 0 <= i < 8: + 1. Read the i-th 9-bit field from the next 72 bits of OID and interpret it + as pi, where 0 <= pi < 512. + 2. Compute: wi = pi >> 6, bi = pi & 63. + 3. Test whether bit bi is set in the word wi of bucket b. + +Security considerations +------------------------ + +An adversarial packfile where objects are (computationally intensive, even for +SHA-1 as vulnerable as it is) constructed to have the same prefix for the +relevant object format hash algorithm could be used to fill up the bloom +filters, rendering some buckets useless. In the worst case, if they somehow +fill all filters, this proposal's optimizations become useless, but would not +be a significant DoS vector. + +TODOs +----- + +* Consider dropping mmap (page read vs cachline read) +* How should B and K be chosen? +* How does creation/insert work? Note that packfiles and `.idx`es are immutable. +* What are the sizes? +* What are the false positive rates? +* How are benchmarks? |
