package main import ( "fmt" "math/rand" "os" "sync" "sync/atomic" "time" "igit.com/xbase/raft/db" ) const ( TotalKeys = 100000 UpdateCount = 10000 DeleteCount = 10000 QueryCount = 100000 DataDir = "bench_db_data" ValueBaseSize = 32 // Base size for values ) func main() { // Clean up previous run os.RemoveAll(DataDir) fmt.Printf("Initializing DB Engine in %s...\n", DataDir) e, err := db.NewEngine(DataDir) if err != nil { panic(err) } defer e.Close() // 1. Bulk Insert 100k keys fmt.Println("\n--- Phase 1: Bulk Insert 100k Keys ---") keys := make([]string, TotalKeys) start := time.Now() // Pre-generate keys to avoid benchmark overhead for i := 0; i < TotalKeys; i++ { keys[i] = fmt.Sprintf("bench.key.%d", i) } var wg sync.WaitGroup workers := 10 // Concurrent workers chunkSize := TotalKeys / workers var insertOps int64 for w := 0; w < workers; w++ { wg.Add(1) go func(id int) { defer wg.Done() base := id * chunkSize for i := 0; i < chunkSize; i++ { idx := base + i // Random value string of approx ValueBaseSize // Using consistent size initially to test reuse later val := fmt.Sprintf("value-data-%d-%s", idx, randomString(15)) if err := e.Set(keys[idx], val, uint64(idx)); err != nil { panic(err) } atomic.AddInt64(&insertOps, 1) } }(w) } wg.Wait() duration := time.Since(start) qps := float64(TotalKeys) / duration.Seconds() printStats("Insert", TotalKeys, duration, qps, getFileSize(DataDir+"/values.data")) // 2. Update 10k Keys (Mixed Strategy) // Strategy: // - 50% Longer: Triggers Append + Mark Old Deleted (Old slots become Free) // - 50% Shorter: Triggers In-Place Update (No FreeList change) // But wait, with FreeList, the "Longer" updates will generate Free slots. // Can we verify if subsequent inserts reuse them? fmt.Println("\n--- Phase 2: Update 10k Keys (50% Long, 50% Short) ---") start = time.Now() updateKeys := keys[:UpdateCount] wg = sync.WaitGroup{} chunkSize = UpdateCount / workers for w := 0; w < workers; w++ { wg.Add(1) go func(id int) { defer wg.Done() base := id * chunkSize for i := 0; i < chunkSize; i++ { idx := base + i key := updateKeys[idx] var val string if idx%2 == 0 { // Longer value (Trigger Append, Release Old Slot to FreeList) // Old cap was likely 32 or 48. New cap will be larger. val = fmt.Sprintf("updated-long-value-%d-%s-padding-padding", idx, randomString(40)) } else { // Shorter value (Trigger In-Place) val = "short" } if err := e.Set(key, val, uint64(TotalKeys+idx)); err != nil { panic(err) } } }(w) } wg.Wait() duration = time.Since(start) qps = float64(UpdateCount) / duration.Seconds() printStats("Update", UpdateCount, duration, qps, getFileSize(DataDir+"/values.data")) // 3. Insert New Data to Test Reuse // We generated ~5000 free slots (from the Long updates) in Phase 2. // Let's insert 5000 new keys with size matching the old slots (approx 32-48 bytes). // If reuse works, file size should NOT increase significantly. fmt.Println("\n--- Phase 3: Insert 5k New Keys (Test Reuse) ---") reuseKeysCount := 5000 start = time.Now() for i := 0; i < reuseKeysCount; i++ { key := fmt.Sprintf("reuse.key.%d", i) // Length matches initial inserts (approx 30-40 bytes) val := fmt.Sprintf("new-value-reuse-%d-%s", i, randomString(15)) if err := e.Set(key, val, uint64(TotalKeys+UpdateCount+i)); err != nil { panic(err) } } duration = time.Since(start) qps = float64(reuseKeysCount) / duration.Seconds() printStats("InsertReuse", reuseKeysCount, duration, qps, getFileSize(DataDir+"/values.data")) // 4. Delete 10k Keys fmt.Println("\n--- Phase 4: Delete 10k Keys (Simulated via Update) ---") // Using "DELETED_MARKER" which is short, so it will be In-Place update. // This won't test FreeList populating, but In-Place logic. // To test FreeList populating from Delete, we need a real Delete() or Update to smaller size that frees extra space? // But our storage doesn't split blocks. // So let's stick to standard benchmark. deleteKeys := keys[UpdateCount : UpdateCount+DeleteCount] start = time.Now() wg = sync.WaitGroup{} chunkSize = DeleteCount / workers for w := 0; w < workers; w++ { wg.Add(1) go func(id int) { defer wg.Done() base := id * chunkSize for i := 0; i < chunkSize; i++ { idx := base + i key := deleteKeys[idx] if err := e.Set(key, "DELETED_MARKER", uint64(TotalKeys+UpdateCount+reuseKeysCount+idx)); err != nil { panic(err) } } }(w) } wg.Wait() duration = time.Since(start) qps = float64(DeleteCount) / duration.Seconds() printStats("Delete", DeleteCount, duration, qps, getFileSize(DataDir+"/values.data")) // 5. Query Performance Test fmt.Println("\n--- Phase 5: Query Performance Test ---") // 5.1 Single Key Point Lookup start = time.Now() qPointCount := 100000 wg = sync.WaitGroup{} chunkSize = qPointCount / workers for w := 0; w < workers; w++ { wg.Add(1) go func(id int) { defer wg.Done() base := id * chunkSize for i := 0; i < chunkSize; i++ { // Query: key = "bench.key.12345" idx := base + i key := keys[idx] query := fmt.Sprintf(`key = "%s"`, key) _, _ = e.Query(query) } }(w) } wg.Wait() duration = time.Since(start) qps = float64(qPointCount) / duration.Seconds() printStats("Query(Point)", qPointCount, duration, qps, 0) // 5.2 Query Metadata Only (Key/CommitIndex) - Should be fast (No IO) start = time.Now() qMetaCount := 50000 var metaHits int64 wg = sync.WaitGroup{} chunkSize = qMetaCount / workers for w := 0; w < workers; w++ { wg.Add(1) go func() { defer wg.Done() // Query for keys starting with "bench.key.99" (Last 1000 keys) // This exercises the scan but filters on Key (Metadata) // Note: Our Query implementation scans ALL keys, so total items processed = TotalKeys * qMetaCount / workers // This is extremely heavy if not optimized. // Let's run a smaller number of queries. for i := 0; i < chunkSize; i++ { // Query: key like "bench.key.999*" res, _ := e.Query(`key like "bench.key.999*"`) atomic.AddInt64(&metaHits, int64(len(res))) } }() } wg.Wait() duration = time.Since(start) qps = float64(qMetaCount) / duration.Seconds() printStats("Query(Meta)", qMetaCount, duration, qps, 0) // 5.3 Query with Pagination (LIMIT/OFFSET) // We use the same Meta query but with LIMIT 10 OFFSET 20 to test pagination speedup start = time.Now() qPageCount := 50000 var pageHits int64 wg = sync.WaitGroup{} chunkSize = qPageCount / workers for w := 0; w < workers; w++ { wg.Add(1) go func() { defer wg.Done() for i := 0; i < chunkSize; i++ { // Query with LIMIT 10 OFFSET 20 res, _ := e.Query(`key like "bench.key.999*" LIMIT 10 OFFSET 20`) atomic.AddInt64(&pageHits, int64(len(res))) } }() } wg.Wait() duration = time.Since(start) qps = float64(qPageCount) / duration.Seconds() printStats("Query(Lim10Off20)", qPageCount, duration, qps, 0) // 5.4 Query Value (Needs IO) start = time.Now() qValCount := 100 // Smaller count because full scan IO is slow var valHits int64 wg = sync.WaitGroup{} chunkSize = qValCount / workers if chunkSize == 0 { chunkSize = 1; workers = qValCount } for w := 0; w < workers; w++ { wg.Add(1) go func() { defer wg.Done() for i := 0; i < chunkSize; i++ { // Query: value like "*data-999*" res, _ := e.Query(`value like "*data-999*"`) atomic.AddInt64(&valHits, int64(len(res))) } }() } wg.Wait() duration = time.Since(start) qps = float64(qValCount) / duration.Seconds() printStats("Query(Val)", qValCount, duration, qps, 0) // 6. Verification fmt.Println("\n--- Phase 6: Verification (Full Scan) ---") errors := 0 // 1. Check Updated Keys (0-9999) for i := 0; i < UpdateCount; i++ { val, ok := e.Get(keys[i]) if !ok { fmt.Printf("Error: Key %s not found\n", keys[i]) errors++ continue } if i%2 == 0 { // Should be long if len(val) < 40 { // Our long value is > 40 chars fmt.Printf("Error: Key %s should be long, got: %s\n", keys[i], val) errors++ } } else { // Should be "short" if val != "short" { fmt.Printf("Error: Key %s should be 'short', got: %s\n", keys[i], val) errors++ } } } // 2. Check Deleted Keys (10000-19999) for i := UpdateCount; i < UpdateCount+DeleteCount; i++ { val, ok := e.Get(keys[i]) if !ok { fmt.Printf("Error: Key %s not found\n", keys[i]) errors++ continue } if val != "DELETED_MARKER" { fmt.Printf("Error: Key %s should be deleted, got: %s\n", keys[i], val) errors++ } } // 3. Check Original Keys (20000-99999) for i := UpdateCount + DeleteCount; i < TotalKeys; i++ { val, ok := e.Get(keys[i]) if !ok { fmt.Printf("Error: Key %s not found\n", keys[i]) errors++ continue } // Original values start with "value-data-" if len(val) < 10 || val[:11] != "value-data-" { fmt.Printf("Error: Key %s should be original, got: %s\n", keys[i], val) errors++ } } // 4. Check Reused Keys (Extra 5000) for i := 0; i < 5000; i++ { // reuseKeysCount was hardcoded as 5000 inside main key := fmt.Sprintf("reuse.key.%d", i) val, ok := e.Get(key) if !ok { fmt.Printf("Error: Reuse Key %s not found\n", key) errors++ continue } if len(val) < 10 || val[:16] != "new-value-reuse-" { fmt.Printf("Error: Reuse Key %s mismatch, got: %s\n", key, val) errors++ } } if errors == 0 { fmt.Println("Integrity Check: PASS (All keys verified successfully)") // Cleanup if successful os.RemoveAll(DataDir) } else { fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors) } } func randomString(n int) string { const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" b := make([]byte, n) for i := range b { b[i] = letters[rand.Intn(len(letters))] } return string(b) } func getFileSize(path string) int64 { fi, err := os.Stat(path) if err != nil { return 0 } return fi.Size() } func printStats(op string, count int, d time.Duration, qps float64, size int64) { sizeStr := "" if size > 0 { sizeStr = fmt.Sprintf(" | DB Size: %.2f MB", float64(size)/1024/1024) } fmt.Printf("%s: %d ops in %v | QPS: %.0f%s\n", op, count, d, qps, sizeStr) }