aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-06-14 13:39:35 +0000
committerGravatar Runxi Yu2026-06-14 13:39:35 +0000
commitdf58d018e712c4e0e000a5ce07f47762548fb22c (patch)
tree264dad03ebd0b43608126468b35ee62074652a3b
parentcmd/explain-pack: Add (diff)
cmd/idx-bloom: Write a pack bloom
-rw-r--r--cmd/idx-bloom/doc.go8
-rw-r--r--cmd/idx-bloom/main.go99
2 files changed, 107 insertions, 0 deletions
diff --git a/cmd/idx-bloom/doc.go b/cmd/idx-bloom/doc.go
new file mode 100644
index 00000000..e7d4e818
--- /dev/null
+++ b/cmd/idx-bloom/doc.go
@@ -0,0 +1,8 @@
+// Command idx-bloom reads a Git pack index
+// and writes an IDBL Bloom filter over its object IDs to stdout.
+//
+// With an index filename argument the index is read from that file;
+// with no argument it is read from stdin.
+// A pack index does not record its object format,
+// so the format must be given with -format.
+package main
diff --git a/cmd/idx-bloom/main.go b/cmd/idx-bloom/main.go
new file mode 100644
index 00000000..62fb347b
--- /dev/null
+++ b/cmd/idx-bloom/main.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "os"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func main() {
+ format := flag.String("format", "", "object format of the index: sha1 or sha256 (required)")
+
+ flag.Parse()
+
+ err := run(*format, flag.Args(), os.Stdin, os.Stdout)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, "idx-bloom:", err)
+ os.Exit(1)
+ }
+}
+
+func run(format string, args []string, stdin io.Reader, stdout io.Writer) error {
+ if format == "" {
+ return fmt.Errorf("the -format flag is required (sha1 or sha256)")
+ }
+
+ objectFormat, err := id.ParseObjectFormat(format)
+ if err != nil {
+ return fmt.Errorf("invalid -format %q: %w", format, err)
+ }
+
+ if len(args) > 1 {
+ return fmt.Errorf("at most one index file argument is accepted, got %d", len(args))
+ }
+
+ data, err := readInput(args, stdin)
+ if err != nil {
+ return err
+ }
+
+ index, err := packidx.Parse(data, objectFormat.Size())
+ if err != nil {
+ return fmt.Errorf("parsing index: %w", err)
+ }
+
+ filter, err := buildFilter(objectFormat, &index)
+ if err != nil {
+ return err
+ }
+
+ _, err = stdout.Write(filter)
+ if err != nil {
+ return fmt.Errorf("writing filter: %w", err)
+ }
+
+ return nil
+}
+
+func readInput(args []string, stdin io.Reader) ([]byte, error) {
+ if len(args) == 0 {
+ data, err := io.ReadAll(stdin)
+ if err != nil {
+ return nil, fmt.Errorf("reading index from stdin: %w", err)
+ }
+
+ return data, nil
+ }
+
+ data, err := os.ReadFile(args[0]) //#nosec G304
+ if err != nil {
+ return nil, fmt.Errorf("reading index %q: %w", args[0], err)
+ }
+
+ return data, nil
+}
+
+func buildFilter(objectFormat id.ObjectFormat, index *packidx.Packidx) ([]byte, error) {
+ objects := index.NumObjects()
+
+ bucketCount, k, err := bloom.RecommendParams(objectFormat, objects)
+ if err != nil {
+ return nil, fmt.Errorf("choosing parameters: %w", err)
+ }
+
+ builder, err := bloom.NewBuilder(objectFormat, bucketCount, k)
+ if err != nil {
+ return nil, fmt.Errorf("creating builder: %w", err)
+ }
+
+ for pos := range objects {
+ builder.Add(index.OIDAt(pos))
+ }
+
+ return builder.Bytes(), nil
+}