| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- package main
- import (
- "fmt"
- "math/rand"
- "os"
- "sync"
- "sync/atomic"
- "time"
- "igit.com/xbase/raft/db"
- )
- const (
- TotalKeys = 100000
- UpdateCount = 10000
- DeleteCount = 10000
- QueryCount = 100000
- DataDir = "bench_db_data"
- ValueBaseSize = 32 // Base size for values
- )
- func main() {
- // Clean up previous run
- os.RemoveAll(DataDir)
- fmt.Printf("Initializing DB Engine in %s...\n", DataDir)
- e, err := db.NewEngine(DataDir)
- if err != nil {
- panic(err)
- }
- defer e.Close()
- // 1. Bulk Insert 100k keys
- fmt.Println("\n--- Phase 1: Bulk Insert 100k Keys ---")
- keys := make([]string, TotalKeys)
- start := time.Now()
-
- // Pre-generate keys to avoid benchmark overhead
- for i := 0; i < TotalKeys; i++ {
- keys[i] = fmt.Sprintf("bench.key.%d", i)
- }
- var wg sync.WaitGroup
- workers := 10 // Concurrent workers
- chunkSize := TotalKeys / workers
- var insertOps int64
- for w := 0; w < workers; w++ {
- wg.Add(1)
- go func(id int) {
- defer wg.Done()
- base := id * chunkSize
- for i := 0; i < chunkSize; i++ {
- idx := base + i
- // Random value string of approx ValueBaseSize
- // Using consistent size initially to test reuse later
- val := fmt.Sprintf("value-data-%d-%s", idx, randomString(15))
- if err := e.Set(keys[idx], val, uint64(idx)); err != nil {
- panic(err)
- }
- atomic.AddInt64(&insertOps, 1)
- }
- }(w)
- }
- wg.Wait()
-
- duration := time.Since(start)
- qps := float64(TotalKeys) / duration.Seconds()
- printStats("Insert", TotalKeys, duration, qps, getFileSize(DataDir+"/values.data"))
- // 2. Update 10k Keys (Mixed Strategy)
- // Strategy:
- // - 50% Longer: Triggers Append + Mark Old Deleted (Old slots become Free)
- // - 50% Shorter: Triggers In-Place Update (No FreeList change)
- // But wait, with FreeList, the "Longer" updates will generate Free slots.
- // Can we verify if subsequent inserts reuse them?
- fmt.Println("\n--- Phase 2: Update 10k Keys (50% Long, 50% Short) ---")
- start = time.Now()
-
- updateKeys := keys[:UpdateCount]
- wg = sync.WaitGroup{}
- chunkSize = UpdateCount / workers
- for w := 0; w < workers; w++ {
- wg.Add(1)
- go func(id int) {
- defer wg.Done()
- base := id * chunkSize
- for i := 0; i < chunkSize; i++ {
- idx := base + i
- key := updateKeys[idx]
- var val string
- if idx%2 == 0 {
- // Longer value (Trigger Append, Release Old Slot to FreeList)
- // Old cap was likely 32 or 48. New cap will be larger.
- val = fmt.Sprintf("updated-long-value-%d-%s-padding-padding", idx, randomString(40))
- } else {
- // Shorter value (Trigger In-Place)
- val = "short"
- }
- if err := e.Set(key, val, uint64(TotalKeys+idx)); err != nil {
- panic(err)
- }
- }
- }(w)
- }
- wg.Wait()
-
- duration = time.Since(start)
- qps = float64(UpdateCount) / duration.Seconds()
- printStats("Update", UpdateCount, duration, qps, getFileSize(DataDir+"/values.data"))
-
- // 3. Insert New Data to Test Reuse
- // We generated ~5000 free slots (from the Long updates) in Phase 2.
- // Let's insert 5000 new keys with size matching the old slots (approx 32-48 bytes).
- // If reuse works, file size should NOT increase significantly.
- fmt.Println("\n--- Phase 3: Insert 5k New Keys (Test Reuse) ---")
- reuseKeysCount := 5000
- start = time.Now()
-
- for i := 0; i < reuseKeysCount; i++ {
- key := fmt.Sprintf("reuse.key.%d", i)
- // Length matches initial inserts (approx 30-40 bytes)
- val := fmt.Sprintf("new-value-reuse-%d-%s", i, randomString(15))
- if err := e.Set(key, val, uint64(TotalKeys+UpdateCount+i)); err != nil {
- panic(err)
- }
- }
-
- duration = time.Since(start)
- qps = float64(reuseKeysCount) / duration.Seconds()
- printStats("InsertReuse", reuseKeysCount, duration, qps, getFileSize(DataDir+"/values.data"))
- // 4. Delete 10k Keys
- fmt.Println("\n--- Phase 4: Delete 10k Keys (Simulated via Update) ---")
- // Using "DELETED_MARKER" which is short, so it will be In-Place update.
- // This won't test FreeList populating, but In-Place logic.
- // To test FreeList populating from Delete, we need a real Delete() or Update to smaller size that frees extra space?
- // But our storage doesn't split blocks.
- // So let's stick to standard benchmark.
- deleteKeys := keys[UpdateCount : UpdateCount+DeleteCount]
- start = time.Now()
-
- wg = sync.WaitGroup{}
- chunkSize = DeleteCount / workers
- for w := 0; w < workers; w++ {
- wg.Add(1)
- go func(id int) {
- defer wg.Done()
- base := id * chunkSize
- for i := 0; i < chunkSize; i++ {
- idx := base + i
- key := deleteKeys[idx]
- if err := e.Set(key, "DELETED_MARKER", uint64(TotalKeys+UpdateCount+reuseKeysCount+idx)); err != nil {
- panic(err)
- }
- }
- }(w)
- }
- wg.Wait()
- duration = time.Since(start)
- qps = float64(DeleteCount) / duration.Seconds()
- printStats("Delete", DeleteCount, duration, qps, getFileSize(DataDir+"/values.data"))
- // 5. Verification
- fmt.Println("\n--- Phase 5: Verification (Full Scan) ---")
-
- errors := 0
-
- // 1. Check Updated Keys (0-9999)
- for i := 0; i < UpdateCount; i++ {
- val, ok := e.Get(keys[i])
- if !ok {
- fmt.Printf("Error: Key %s not found\n", keys[i])
- errors++
- continue
- }
- if i%2 == 0 {
- // Should be long
- if len(val) < 40 { // Our long value is > 40 chars
- fmt.Printf("Error: Key %s should be long, got: %s\n", keys[i], val)
- errors++
- }
- } else {
- // Should be "short"
- if val != "short" {
- fmt.Printf("Error: Key %s should be 'short', got: %s\n", keys[i], val)
- errors++
- }
- }
- }
-
- // 2. Check Deleted Keys (10000-19999)
- for i := UpdateCount; i < UpdateCount+DeleteCount; i++ {
- val, ok := e.Get(keys[i])
- if !ok {
- fmt.Printf("Error: Key %s not found\n", keys[i])
- errors++
- continue
- }
- if val != "DELETED_MARKER" {
- fmt.Printf("Error: Key %s should be deleted, got: %s\n", keys[i], val)
- errors++
- }
- }
-
- // 3. Check Original Keys (20000-99999)
- for i := UpdateCount + DeleteCount; i < TotalKeys; i++ {
- val, ok := e.Get(keys[i])
- if !ok {
- fmt.Printf("Error: Key %s not found\n", keys[i])
- errors++
- continue
- }
- // Original values start with "value-data-"
- if len(val) < 10 || val[:11] != "value-data-" {
- fmt.Printf("Error: Key %s should be original, got: %s\n", keys[i], val)
- errors++
- }
- }
-
- // 4. Check Reused Keys (Extra 5000)
- for i := 0; i < 5000; i++ { // reuseKeysCount was hardcoded as 5000 inside main
- key := fmt.Sprintf("reuse.key.%d", i)
- val, ok := e.Get(key)
- if !ok {
- fmt.Printf("Error: Reuse Key %s not found\n", key)
- errors++
- continue
- }
- if len(val) < 10 || val[:16] != "new-value-reuse-" {
- fmt.Printf("Error: Reuse Key %s mismatch, got: %s\n", key, val)
- errors++
- }
- }
- if errors == 0 {
- fmt.Println("Integrity Check: PASS (All keys verified successfully)")
- } else {
- fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
- }
- }
- func randomString(n int) string {
- const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
- b := make([]byte, n)
- for i := range b {
- b[i] = letters[rand.Intn(len(letters))]
- }
- return string(b)
- }
- func getFileSize(path string) int64 {
- fi, err := os.Stat(path)
- if err != nil {
- return 0
- }
- return fi.Size()
- }
- func printStats(op string, count int, d time.Duration, qps float64, size int64) {
- sizeStr := ""
- if size > 0 {
- sizeStr = fmt.Sprintf(" | DB Size: %.2f MB", float64(size)/1024/1024)
- }
- fmt.Printf("%s: %d ops in %v | QPS: %.0f%s\n", op, count, d, qps, sizeStr)
- }
|