diff options
| author | 2026-06-14 13:39:35 +0000 | |
|---|---|---|
| committer | 2026-06-14 13:39:35 +0000 | |
| commit | df58d018e712c4e0e000a5ce07f47762548fb22c (patch) | |
| tree | 264dad03ebd0b43608126468b35ee62074652a3b | |
| parent | cmd/explain-pack: Add (diff) | |
cmd/idx-bloom: Write a pack bloom
| -rw-r--r-- | cmd/idx-bloom/doc.go | 8 | ||||
| -rw-r--r-- | cmd/idx-bloom/main.go | 99 |
2 files changed, 107 insertions, 0 deletions
diff --git a/cmd/idx-bloom/doc.go b/cmd/idx-bloom/doc.go new file mode 100644 index 00000000..e7d4e818 --- /dev/null +++ b/cmd/idx-bloom/doc.go @@ -0,0 +1,8 @@ +// Command idx-bloom reads a Git pack index +// and writes an IDBL Bloom filter over its object IDs to stdout. +// +// With an index filename argument the index is read from that file; +// with no argument it is read from stdin. +// A pack index does not record its object format, +// so the format must be given with -format. +package main diff --git a/cmd/idx-bloom/main.go b/cmd/idx-bloom/main.go new file mode 100644 index 00000000..62fb347b --- /dev/null +++ b/cmd/idx-bloom/main.go @@ -0,0 +1,99 @@ +package main + +import ( + "flag" + "fmt" + "io" + "os" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" + "lindenii.org/go/furgit/object/id" +) + +func main() { + format := flag.String("format", "", "object format of the index: sha1 or sha256 (required)") + + flag.Parse() + + err := run(*format, flag.Args(), os.Stdin, os.Stdout) + if err != nil { + fmt.Fprintln(os.Stderr, "idx-bloom:", err) + os.Exit(1) + } +} + +func run(format string, args []string, stdin io.Reader, stdout io.Writer) error { + if format == "" { + return fmt.Errorf("the -format flag is required (sha1 or sha256)") + } + + objectFormat, err := id.ParseObjectFormat(format) + if err != nil { + return fmt.Errorf("invalid -format %q: %w", format, err) + } + + if len(args) > 1 { + return fmt.Errorf("at most one index file argument is accepted, got %d", len(args)) + } + + data, err := readInput(args, stdin) + if err != nil { + return err + } + + index, err := packidx.Parse(data, objectFormat.Size()) + if err != nil { + return fmt.Errorf("parsing index: %w", err) + } + + filter, err := buildFilter(objectFormat, &index) + if err != nil { + return err + } + + _, err = stdout.Write(filter) + if err != nil { + return fmt.Errorf("writing filter: %w", err) + } + + return nil +} + +func readInput(args []string, stdin io.Reader) ([]byte, error) { + if len(args) == 0 { + data, err := io.ReadAll(stdin) + if err != nil { + return nil, fmt.Errorf("reading index from stdin: %w", err) + } + + return data, nil + } + + data, err := os.ReadFile(args[0]) //#nosec G304 + if err != nil { + return nil, fmt.Errorf("reading index %q: %w", args[0], err) + } + + return data, nil +} + +func buildFilter(objectFormat id.ObjectFormat, index *packidx.Packidx) ([]byte, error) { + objects := index.NumObjects() + + bucketCount, k, err := bloom.RecommendParams(objectFormat, objects) + if err != nil { + return nil, fmt.Errorf("choosing parameters: %w", err) + } + + builder, err := bloom.NewBuilder(objectFormat, bucketCount, k) + if err != nil { + return nil, fmt.Errorf("creating builder: %w", err) + } + + for pos := range objects { + builder.Add(index.OIDAt(pos)) + } + + return builder.Bytes(), nil +} |
