diff options
| author | 2026-01-28 21:55:53 +0100 | |
|---|---|---|
| committer | 2026-01-28 22:04:12 +0100 | |
| commit | e6919174cfb82e283ba7201a06067650dab3cb15 (patch) | |
| tree | 70f93f24022eaecb1ed7b2e6659a7cc1780be340 | |
| parent | pack: Add _read to filenames (diff) | |
| signature | No signature | |
pack: basic packfile writing
| -rw-r--r-- | pack_pack_write.go | 273 | ||||
| -rw-r--r-- | pack_write_test.go | 226 |
2 files changed, 499 insertions, 0 deletions
diff --git a/pack_pack_write.go b/pack_pack_write.go new file mode 100644 index 00000000..435c9edb --- /dev/null +++ b/pack_pack_write.go @@ -0,0 +1,273 @@ +package furgit + +import ( + "crypto/sha1" + "crypto/sha256" + "encoding/binary" + "errors" + "hash" + "io" + + "codeberg.org/lindenii/furgit/internal/zlib" +) + +// TODO +var errPackDeltaUnimplemented = errors.New("furgit: pack: delta writing not implemented") + +// packWriter writes a PACKv2 stream. +type packWriter struct { + w io.Writer + h hash.Hash + algo hashAlgorithm + objCount uint32 + wroteHeader bool + bytesWritten uint64 +} + +func newPackWriter(w io.Writer, algo hashAlgorithm, objCount uint32) (*packWriter, error) { + if w == nil { + return nil, ErrInvalidObject + } + h, err := newHashWriter(algo) + if err != nil { + return nil, err + } + return &packWriter{ + w: w, + h: h, + algo: algo, + objCount: objCount, + }, nil +} + +func newHashWriter(algo hashAlgorithm) (hash.Hash, error) { + switch algo { + case hashAlgoSHA1: + return sha1.New(), nil + case hashAlgoSHA256: + return sha256.New(), nil + default: + return nil, ErrInvalidObject + } +} + +func (pw *packWriter) writePacked(p []byte) error { + if len(p) == 0 { + return nil + } + n, err := pw.w.Write(p) + if n > 0 { + _, _ = pw.h.Write(p[:n]) + pw.bytesWritten += uint64(n) + } + if err != nil { + return err + } + if n != len(p) { + return io.ErrShortWrite + } + return nil +} + +func (pw *packWriter) WriteHeader() error { + if pw == nil || pw.wroteHeader { + return ErrInvalidObject + } + var hdr [12]byte + binary.BigEndian.PutUint32(hdr[0:4], packMagic) + binary.BigEndian.PutUint32(hdr[4:8], packVersion2) + binary.BigEndian.PutUint32(hdr[8:12], pw.objCount) + if err := pw.writePacked(hdr[:]); err != nil { + return err + } + pw.wroteHeader = true + return nil +} + +func (pw *packWriter) WriteObject(ty ObjectType, body []byte) error { + if pw == nil || !pw.wroteHeader { + return ErrInvalidObject + } + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + // remember that go switches don't fallthrough lol + default: + return ErrInvalidObject + } + if body == nil { + body = []byte{} + } + + hdr, err := packHeaderEncode(ty, len(body)) + if err != nil { + return err + } + if err := pw.writePacked(hdr); err != nil { + return err + } + + zw := zlib.NewWriter(&packHashWriter{pw: pw}) + if _, err := zw.Write(body); err != nil { + _ = zw.Close() + return err + } + return zw.Close() +} + +func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error { + _ = baseOffset + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented +} + +func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error { + _ = base + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented +} + +func (pw *packWriter) Close() (Hash, error) { + if pw == nil || !pw.wroteHeader { + return Hash{}, ErrInvalidObject + } + sum := pw.h.Sum(nil) + if _, err := pw.w.Write(sum); err != nil { + return Hash{}, err + } + var out Hash + copy(out.data[:], sum) + out.algo = pw.algo + return out, nil +} + +type packHashWriter struct { + pw *packWriter +} + +func (w *packHashWriter) Write(p []byte) (int, error) { + if w == nil || w.pw == nil { + return 0, ErrInvalidObject + } + if err := w.pw.writePacked(p); err != nil { + return 0, err + } + return len(p), nil +} + +// packHeaderEncode encodes a pack object header (type + size). +func packHeaderEncode(ty ObjectType, size int) ([]byte, error) { + if size < 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + + b := byte(size & 0x0f) + size >>= 4 + b |= byte(ty&0x07) << 4 + if size > 0 { + b |= 0x80 + } + out[pos] = b + pos++ + + for size > 0 { + b = byte(size & 0x7f) + size >>= 7 + if size > 0 { + b |= 0x80 + } + out[pos] = b + pos++ + } + + return out[:pos], nil +} + +// packVarintEncode encodes a 7-bit varint. +func packVarintEncode(size int) ([]byte, error) { + if size < 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + for { + b := byte(size & 0x7f) + size >>= 7 + if size != 0 { + b |= 0x80 + } + out[pos] = b + pos++ + if size == 0 { + break + } + } + return out[:pos], nil +} + +// packOfsEncode encodes an ofs-delta distance. +func packOfsEncode(dist uint64) ([]byte, error) { + if dist == 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + out[pos] = byte(dist & 0x7f) + pos++ + dist >>= 7 + for dist != 0 { + b := byte((dist - 1) & 0x7f) + out[pos] = b | 0x80 + pos++ + dist >>= 7 + } + for i, j := 0, pos-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] + } + return out[:pos], nil +} + +// packWrite writes a pack stream for the provided object ids. +func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) { + if repo == nil { + return Hash{}, ErrInvalidObject + } + if opts.EnableDeltas || opts.EnableThinPack { + return Hash{}, errPackDeltaUnimplemented + } + if len(objects) > int(^uint32(0)) { + return Hash{}, ErrInvalidObject + } + + pw, err := newPackWriter(w, repo.hashAlgo, uint32(len(objects))) + if err != nil { + return Hash{}, err + } + if err := pw.WriteHeader(); err != nil { + return Hash{}, err + } + + for _, id := range objects { + ty, body, err := repo.ReadObjectTypeRaw(id) + if err != nil { + return Hash{}, err + } + if err := pw.WriteObject(ty, body); err != nil { + return Hash{}, err + } + } + + return pw.Close() +} + +type packWriteOptions struct { + EnableDeltas bool + EnableThinPack bool + MinDeltaSavings int + MaxDeltaDepth int +} diff --git a/pack_write_test.go b/pack_write_test.go new file mode 100644 index 00000000..d33ae012 --- /dev/null +++ b/pack_write_test.go @@ -0,0 +1,226 @@ +package furgit + +import ( + "bytes" + "errors" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestPackHeaderEncodeParseRoundtrip(t *testing.T) { + cases := []struct { + ty ObjectType + sizes []int + }{ + {ObjectTypeCommit, []int{0, 1, 15, 16, 127, 128, 1024, 1 << 20}}, + {ObjectTypeTree, []int{0, 3, 31, 32, 255, 256, 4096}}, + {ObjectTypeBlob, []int{0, 7, 63, 64, 511, 512, 99999}}, + {ObjectTypeTag, []int{0, 2, 14, 15, 16, 127, 128}}, + } + + for _, c := range cases { + for _, size := range c.sizes { + encoded, err := packHeaderEncode(c.ty, size) + if err != nil { + t.Fatalf("packHeaderEncode(%v,%d) error: %v", c.ty, size, err) + } + gotTy, gotSize, consumed, err := packHeaderParse(encoded) + if err != nil { + t.Fatalf("packHeaderParse error: %v", err) + } + if gotTy != c.ty || gotSize != size { + t.Fatalf("roundtrip mismatch: got (%v,%d), want (%v,%d)", gotTy, gotSize, c.ty, size) + } + if consumed != len(encoded) { + t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) + } + } + } +} + +func TestPackVarintEncodeRoundtrip(t *testing.T) { + values := []int{0, 1, 2, 7, 8, 127, 128, 129, 255, 1024, 1 << 20} + for _, v := range values { + encoded, err := packVarintEncode(v) + if err != nil { + t.Fatalf("packVarintEncode(%d) error: %v", v, err) + } + pos := 0 + got, err := packVarintRead(encoded, &pos) + if err != nil { + t.Fatalf("packVarintRead error: %v", err) + } + if got != v { + t.Fatalf("roundtrip mismatch: got %d, want %d", got, v) + } + if pos != len(encoded) { + t.Fatalf("pos=%d, encoded=%d", pos, len(encoded)) + } + } +} + +func TestPackOfsEncodeRoundtrip(t *testing.T) { + values := []uint64{1, 2, 7, 8, 9, 0x7f, 0x80, 0x81, 0x1000, 0x12345} + for _, v := range values { + encoded, err := packOfsEncode(v) + if err != nil { + t.Fatalf("packOfsEncode(%d) error: %v", v, err) + } + dist, consumed, err := packDeltaReadOfsDistance(encoded) + if err != nil { + t.Fatalf("packDeltaReadOfsDistance error: %v", err) + } + if dist != v { + t.Fatalf("roundtrip mismatch: got %d, want %d", dist, v) + } + if consumed != len(encoded) { + t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) + } + } +} + +func TestPackWriteNoDeltas(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + if err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644); err != nil { + t.Fatalf("failed to write file1.txt: %v", err) + } + if err := os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644); err != nil { + t.Fatalf("failed to write file2.txt: %v", err) + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + commitBody := gitCatFile(t, repoPath, "commit", commitHash) + lines := bytes.Split(commitBody, []byte{'\n'}) + if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) { + t.Fatalf("commit missing tree header") + } + treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree ")))) + + lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash) + var blobHashes []string + for _, line := range strings.Split(lsTree, "\n") { + if line == "" { + continue + } + fields := strings.Fields(line) + if len(fields) < 3 { + t.Fatalf("unexpected ls-tree line: %q", line) + } + blobHashes = append(blobHashes, fields[2]) + } + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + var objects []Hash + commitID, _ := repo.ParseHash(commitHash) + objects = append(objects, commitID) + treeID, _ := repo.ParseHash(treeHash) + objects = append(objects, treeID) + for _, bh := range blobHashes { + id, _ := repo.ParseHash(bh) + objects = append(objects, id) + } + expectedOids := append([]string{commitHash, treeHash}, blobHashes...) + + packDir := filepath.Join(repoPath, "objects", "pack") + if err := os.MkdirAll(packDir, 0o755); err != nil { + t.Fatalf("failed to create pack dir: %v", err) + } + pf, err := os.CreateTemp(packDir, "furgit-test-*.pack") + if err != nil { + t.Fatalf("failed to create pack file: %v", err) + } + packPath := pf.Name() + idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" + if _, err := repo.packWrite(pf, objects, packWriteOptions{}); err != nil { + _ = pf.Close() + t.Fatalf("packWrite failed: %v", err) + } + if err := pf.Close(); err != nil { + t.Fatalf("failed to close pack file: %v", err) + } + + defer func() { + _ = os.Remove(packPath) + _ = os.Remove(idxPath) + }() + + _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath) + + verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath) + seen := make(map[string]struct{}) + for _, line := range strings.Split(verifyOut, "\n") { + if strings.TrimSpace(line) == "" { + continue + } + if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") { + continue + } + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + seen[parts[0]] = struct{}{} + } + for _, oid := range expectedOids { + if _, ok := seen[oid]; !ok { + t.Fatalf("verify-pack missing object %s", oid) + } + } + + for _, oid := range expectedOids { + if err := removeLooseObject(repoPath, oid); err != nil { + t.Fatalf("remove loose object %s: %v", oid, err) + } + } + for _, oid := range expectedOids { + _ = gitCmd(t, repoPath, "cat-file", "-p", oid) + } + + _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") +} + +func TestPackWriteDeltasUnimplemented(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + buf := new(bytes.Buffer) + _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true}) + if !errors.Is(err, errPackDeltaUnimplemented) { + t.Fatalf("expected errPackDeltaUnimplemented, got %v", err) + } +} + +func removeLooseObject(repoPath, oid string) error { + if len(oid) < 2 { + return ErrInvalidObject + } + path := filepath.Join(repoPath, "objects", oid[:2], oid[2:]) + if err := os.Remove(path); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + return nil +} |
