heroagent/pkg/data/dedupestor/dedupestor_test.go
2025-04-23 04:18:28 +02:00

533 lines
14 KiB
Go

package dedupestor
import (
"bytes"
"os"
"path/filepath"
"testing"
)
func setupTest(t *testing.T) {
// Ensure test directories exist and are clean
testDirs := []string{
"/tmp/dedupestor_test",
"/tmp/dedupestor_test_size",
"/tmp/dedupestor_test_exists",
"/tmp/dedupestor_test_multiple",
"/tmp/dedupestor_test_refs",
}
for _, dir := range testDirs {
if _, err := os.Stat(dir); err == nil {
err := os.RemoveAll(dir)
if err != nil {
t.Fatalf("Failed to remove test directory %s: %v", dir, err)
}
}
err := os.MkdirAll(dir, 0755)
if err != nil {
t.Fatalf("Failed to create test directory %s: %v", dir, err)
}
}
}
func TestBasicOperations(t *testing.T) {
setupTest(t)
ds, err := New(NewArgs{
Path: "/tmp/dedupestor_test",
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
// Test storing and retrieving data
value1 := []byte("test data 1")
ref1 := Reference{Owner: 1, ID: 1}
id1, err := ds.Store(value1, ref1)
if err != nil {
t.Fatalf("Failed to store data: %v", err)
}
retrieved1, err := ds.Get(id1)
if err != nil {
t.Fatalf("Failed to retrieve data: %v", err)
}
if !bytes.Equal(retrieved1, value1) {
t.Fatalf("Retrieved data doesn't match stored data")
}
// Test deduplication with different reference
ref2 := Reference{Owner: 1, ID: 2}
id2, err := ds.Store(value1, ref2)
if err != nil {
t.Fatalf("Failed to store data with second reference: %v", err)
}
if id1 != id2 {
t.Fatalf("Expected same ID for duplicate data, got %d and %d", id1, id2)
}
// Test different data gets different ID
value2 := []byte("test data 2")
ref3 := Reference{Owner: 1, ID: 3}
id3, err := ds.Store(value2, ref3)
if err != nil {
t.Fatalf("Failed to store different data: %v", err)
}
if id1 == id3 {
t.Fatalf("Expected different IDs for different data, got %d for both", id1)
}
retrieved2, err := ds.Get(id3)
if err != nil {
t.Fatalf("Failed to retrieve second data: %v", err)
}
if !bytes.Equal(retrieved2, value2) {
t.Fatalf("Retrieved data doesn't match second stored data")
}
}
func TestSizeLimit(t *testing.T) {
setupTest(t)
ds, err := New(NewArgs{
Path: "/tmp/dedupestor_test_size",
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
// Test data under size limit (1KB)
smallData := make([]byte, 1024)
for i := range smallData {
smallData[i] = byte(i % 256)
}
ref := Reference{Owner: 1, ID: 1}
smallID, err := ds.Store(smallData, ref)
if err != nil {
t.Fatalf("Failed to store small data: %v", err)
}
retrieved, err := ds.Get(smallID)
if err != nil {
t.Fatalf("Failed to retrieve small data: %v", err)
}
if !bytes.Equal(retrieved, smallData) {
t.Fatalf("Retrieved data doesn't match stored small data")
}
// Test data over size limit (2MB)
largeData := make([]byte, 2*1024*1024)
for i := range largeData {
largeData[i] = byte(i % 256)
}
_, err = ds.Store(largeData, ref)
if err == nil {
t.Fatalf("Expected error for data exceeding size limit")
}
}
func TestExists(t *testing.T) {
setupTest(t)
ds, err := New(NewArgs{
Path: "/tmp/dedupestor_test_exists",
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
value := []byte("test data")
ref := Reference{Owner: 1, ID: 1}
id, err := ds.Store(value, ref)
if err != nil {
t.Fatalf("Failed to store data: %v", err)
}
if !ds.IDExists(id) {
t.Fatalf("IDExists returned false for existing ID")
}
if ds.IDExists(99) {
t.Fatalf("IDExists returned true for non-existent ID")
}
// Calculate hash to test HashExists
data, err := ds.Get(id)
if err != nil {
t.Fatalf("Failed to get data: %v", err)
}
hash := sha256Sum(data)
if !ds.HashExists(hash) {
t.Fatalf("HashExists returned false for existing hash")
}
if ds.HashExists("nonexistenthash") {
t.Fatalf("HashExists returned true for non-existent hash")
}
}
func TestMultipleOperations(t *testing.T) {
setupTest(t)
ds, err := New(NewArgs{
Path: "/tmp/dedupestor_test_multiple",
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
// Store multiple values
values := [][]byte{}
ids := []uint32{}
for i := 0; i < 5; i++ {
value := []byte("test data " + string(rune('0'+i)))
values = append(values, value)
ref := Reference{Owner: 1, ID: uint32(i)}
id, err := ds.Store(value, ref)
if err != nil {
t.Fatalf("Failed to store data %d: %v", i, err)
}
ids = append(ids, id)
}
// Verify all values can be retrieved
for i, id := range ids {
retrieved, err := ds.Get(id)
if err != nil {
t.Fatalf("Failed to retrieve data %d: %v", i, err)
}
if !bytes.Equal(retrieved, values[i]) {
t.Fatalf("Retrieved data %d doesn't match stored data", i)
}
}
// Test deduplication by storing same values again
for i, value := range values {
ref := Reference{Owner: 2, ID: uint32(i)}
id, err := ds.Store(value, ref)
if err != nil {
t.Fatalf("Failed to store duplicate data %d: %v", i, err)
}
if id != ids[i] {
t.Fatalf("Expected same ID for duplicate data %d, got %d and %d", i, ids[i], id)
}
}
}
func TestReferences(t *testing.T) {
setupTest(t)
ds, err := New(NewArgs{
Path: "/tmp/dedupestor_test_refs",
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
// Store same data with different references
value := []byte("test data")
ref1 := Reference{Owner: 1, ID: 1}
ref2 := Reference{Owner: 1, ID: 2}
ref3 := Reference{Owner: 2, ID: 1}
// Store with first reference
id, err := ds.Store(value, ref1)
if err != nil {
t.Fatalf("Failed to store data with first reference: %v", err)
}
// Store same data with second reference
id2, err := ds.Store(value, ref2)
if err != nil {
t.Fatalf("Failed to store data with second reference: %v", err)
}
if id != id2 {
t.Fatalf("Expected same ID for same data, got %d and %d", id, id2)
}
// Store same data with third reference
id3, err := ds.Store(value, ref3)
if err != nil {
t.Fatalf("Failed to store data with third reference: %v", err)
}
if id != id3 {
t.Fatalf("Expected same ID for same data, got %d and %d", id, id3)
}
// Delete first reference - data should still exist
err = ds.Delete(id, ref1)
if err != nil {
t.Fatalf("Failed to delete first reference: %v", err)
}
if !ds.IDExists(id) {
t.Fatalf("Data should still exist after deleting first reference")
}
// Delete second reference - data should still exist
err = ds.Delete(id, ref2)
if err != nil {
t.Fatalf("Failed to delete second reference: %v", err)
}
if !ds.IDExists(id) {
t.Fatalf("Data should still exist after deleting second reference")
}
// Delete last reference - data should be gone
err = ds.Delete(id, ref3)
if err != nil {
t.Fatalf("Failed to delete third reference: %v", err)
}
if ds.IDExists(id) {
t.Fatalf("Data should be deleted after removing all references")
}
// Verify data is actually deleted by trying to get it
_, err = ds.Get(id)
if err == nil {
t.Fatalf("Expected error getting deleted data")
}
}
func TestMetadataConversion(t *testing.T) {
// Test Reference conversion
ref := Reference{
Owner: 12345,
ID: 67890,
}
bytes := ref.ToBytes()
recovered := BytesToReference(bytes)
if ref.Owner != recovered.Owner || ref.ID != recovered.ID {
t.Fatalf("Reference conversion failed: original %+v, recovered %+v", ref, recovered)
}
// Test Metadata conversion
metadata := Metadata{
ID: 42,
References: []Reference{},
}
ref1 := Reference{Owner: 1, ID: 100}
ref2 := Reference{Owner: 2, ID: 200}
metadata, err := metadata.AddReference(ref1)
if err != nil {
t.Fatalf("Failed to add reference: %v", err)
}
metadata, err = metadata.AddReference(ref2)
if err != nil {
t.Fatalf("Failed to add reference: %v", err)
}
bytes = metadata.ToBytes()
recovered2 := BytesToMetadata(bytes)
if metadata.ID != recovered2.ID || len(metadata.References) != len(recovered2.References) {
t.Fatalf("Metadata conversion failed: original %+v, recovered %+v", metadata, recovered2)
}
for i, ref := range metadata.References {
if ref.Owner != recovered2.References[i].Owner || ref.ID != recovered2.References[i].ID {
t.Fatalf("Reference in metadata conversion failed at index %d", i)
}
}
}
func TestAddRemoveReference(t *testing.T) {
metadata := Metadata{
ID: 1,
References: []Reference{},
}
ref1 := Reference{Owner: 1, ID: 100}
ref2 := Reference{Owner: 2, ID: 200}
// Add first reference
metadata, err := metadata.AddReference(ref1)
if err != nil {
t.Fatalf("Failed to add first reference: %v", err)
}
if len(metadata.References) != 1 {
t.Fatalf("Expected 1 reference after adding first, got %d", len(metadata.References))
}
if metadata.References[0].Owner != ref1.Owner || metadata.References[0].ID != ref1.ID {
t.Fatalf("First reference not added correctly")
}
// Add second reference
metadata, err = metadata.AddReference(ref2)
if err != nil {
t.Fatalf("Failed to add second reference: %v", err)
}
if len(metadata.References) != 2 {
t.Fatalf("Expected 2 references after adding second, got %d", len(metadata.References))
}
// Try adding duplicate reference
metadata, err = metadata.AddReference(ref1)
if err != nil {
t.Fatalf("Failed to add duplicate reference: %v", err)
}
if len(metadata.References) != 2 {
t.Fatalf("Expected 2 references after adding duplicate, got %d", len(metadata.References))
}
// Remove first reference
metadata, err = metadata.RemoveReference(ref1)
if err != nil {
t.Fatalf("Failed to remove first reference: %v", err)
}
if len(metadata.References) != 1 {
t.Fatalf("Expected 1 reference after removing first, got %d", len(metadata.References))
}
if metadata.References[0].Owner != ref2.Owner || metadata.References[0].ID != ref2.ID {
t.Fatalf("Wrong reference removed")
}
// Remove non-existent reference
metadata, err = metadata.RemoveReference(Reference{Owner: 999, ID: 999})
if err != nil {
t.Fatalf("Failed to remove non-existent reference: %v", err)
}
if len(metadata.References) != 1 {
t.Fatalf("Expected 1 reference after removing non-existent, got %d", len(metadata.References))
}
// Remove last reference
metadata, err = metadata.RemoveReference(ref2)
if err != nil {
t.Fatalf("Failed to remove last reference: %v", err)
}
if len(metadata.References) != 0 {
t.Fatalf("Expected 0 references after removing last, got %d", len(metadata.References))
}
}
func TestEmptyMetadataBytes(t *testing.T) {
empty := BytesToMetadata([]byte{})
if empty.ID != 0 || len(empty.References) != 0 {
t.Fatalf("Expected empty metadata, got %+v", empty)
}
}
func TestDeduplicationSize(t *testing.T) {
testDir := "/tmp/dedupestor_test_dedup_size"
// Clean up test directory
if _, err := os.Stat(testDir); err == nil {
os.RemoveAll(testDir)
}
os.MkdirAll(testDir, 0755)
// Create a new dedupe store
ds, err := New(NewArgs{
Path: testDir,
Reset: true,
})
if err != nil {
t.Fatalf("Failed to create dedupe store: %v", err)
}
defer ds.Close()
// Store a large piece of data (100KB)
largeData := make([]byte, 100*1024)
for i := range largeData {
largeData[i] = byte(i % 256)
}
// Store the data with first reference
ref1 := Reference{Owner: 1, ID: 1}
id1, err := ds.Store(largeData, ref1)
if err != nil {
t.Fatalf("Failed to store data with first reference: %v", err)
}
// Get the size of the data directory after first store
dataDir := testDir + "/data"
sizeAfterFirst, err := getDirSize(dataDir)
if err != nil {
t.Fatalf("Failed to get directory size: %v", err)
}
t.Logf("Size after first store: %d bytes", sizeAfterFirst)
// Store the same data with different references multiple times
for i := 2; i <= 10; i++ {
ref := Reference{Owner: uint16(i), ID: uint32(i)}
id, err := ds.Store(largeData, ref)
if err != nil {
t.Fatalf("Failed to store data with reference %d: %v", i, err)
}
// Verify we get the same ID (deduplication is working)
if id != id1 {
t.Fatalf("Expected same ID for duplicate data, got %d and %d", id1, id)
}
}
// Get the size after storing the same data multiple times
sizeAfterMultiple, err := getDirSize(dataDir)
if err != nil {
t.Fatalf("Failed to get directory size: %v", err)
}
t.Logf("Size after storing same data 10 times: %d bytes", sizeAfterMultiple)
// The size should be approximately the same (allowing for metadata overhead)
// We'll check that it hasn't grown significantly (less than 10% increase)
if sizeAfterMultiple > sizeAfterFirst*110/100 {
t.Fatalf("Directory size grew significantly after storing duplicate data: %d -> %d bytes",
sizeAfterFirst, sizeAfterMultiple)
}
// Now store different data
differentData := make([]byte, 100*1024)
for i := range differentData {
differentData[i] = byte((i + 128) % 256) // Different pattern
}
ref11 := Reference{Owner: 11, ID: 11}
_, err = ds.Store(differentData, ref11)
if err != nil {
t.Fatalf("Failed to store different data: %v", err)
}
// Get the size after storing different data
sizeAfterDifferent, err := getDirSize(dataDir)
if err != nil {
t.Fatalf("Failed to get directory size: %v", err)
}
t.Logf("Size after storing different data: %d bytes", sizeAfterDifferent)
// The size should have increased significantly
if sizeAfterDifferent <= sizeAfterMultiple*110/100 {
t.Fatalf("Directory size didn't grow as expected after storing different data: %d -> %d bytes",
sizeAfterMultiple, sizeAfterDifferent)
}
}
// getDirSize returns the total size of all files in a directory in bytes
func getDirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return nil
})
return size, err
}