xbase
/
raft


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
							package main

import (
	"fmt"
	"math/rand"
	"os"
	"sync"
	"sync/atomic"
	"time"

	"igit.com/xbase/raft/db"
)

const (
	TotalKeys      = 100000
	UpdateCount    = 10000
	DeleteCount    = 10000
	QueryCount     = 100000
	DataDir        = "bench_db_data"
	ValueBaseSize  = 32  // Base size for values
)

func main() {
	// Clean up previous run
	os.RemoveAll(DataDir)

	fmt.Printf("Initializing DB Engine in %s...\n", DataDir)
	e, err := db.NewEngine(DataDir)
	if err != nil {
		panic(err)
	}
	defer e.Close()

	// 1. Bulk Insert 100k keys
	fmt.Println("\n--- Phase 1: Bulk Insert 100k Keys ---")
	keys := make([]string, TotalKeys)
	start := time.Now()
	
	// Pre-generate keys to avoid benchmark overhead
	for i := 0; i < TotalKeys; i++ {
		keys[i] = fmt.Sprintf("bench.key.%d", i)
	}

	var wg sync.WaitGroup
	workers := 10 // Concurrent workers
	chunkSize := TotalKeys / workers
	var insertOps int64

	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			base := id * chunkSize
			for i := 0; i < chunkSize; i++ {
				idx := base + i
				// Random value string of approx ValueBaseSize
				// Using consistent size initially to test reuse later
				val := fmt.Sprintf("value-data-%d-%s", idx, randomString(15))
				if err := e.Set(keys[idx], val, uint64(idx)); err != nil {
					panic(err)
				}
				atomic.AddInt64(&insertOps, 1)
			}
		}(w)
	}
	wg.Wait()
	
	duration := time.Since(start)
	qps := float64(TotalKeys) / duration.Seconds()
	printStats("Insert", TotalKeys, duration, qps, getFileSize(DataDir+"/values.data"))

	// 2. Update 10k Keys (Mixed Strategy)
	// Strategy:
	// - 50% Longer: Triggers Append + Mark Old Deleted (Old slots become Free)
	// - 50% Shorter: Triggers In-Place Update (No FreeList change)
	// But wait, with FreeList, the "Longer" updates will generate Free slots.
	// Can we verify if subsequent inserts reuse them?
	fmt.Println("\n--- Phase 2: Update 10k Keys (50% Long, 50% Short) ---")
	start = time.Now()
	
	updateKeys := keys[:UpdateCount]
	wg = sync.WaitGroup{}
	chunkSize = UpdateCount / workers

	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			base := id * chunkSize
			for i := 0; i < chunkSize; i++ {
				idx := base + i
				key := updateKeys[idx]
				var val string
				if idx%2 == 0 {
					// Longer value (Trigger Append, Release Old Slot to FreeList)
					// Old cap was likely 32 or 48. New cap will be larger.
					val = fmt.Sprintf("updated-long-value-%d-%s-padding-padding", idx, randomString(40))
				} else {
					// Shorter value (Trigger In-Place)
					val = "short" 
				}
				if err := e.Set(key, val, uint64(TotalKeys+idx)); err != nil {
					panic(err)
				}
			}
		}(w)
	}
	wg.Wait()
	
	duration = time.Since(start)
	qps = float64(UpdateCount) / duration.Seconds()
	printStats("Update", UpdateCount, duration, qps, getFileSize(DataDir+"/values.data"))
	
	// 3. Insert New Data to Test Reuse
	// We generated ~5000 free slots (from the Long updates) in Phase 2.
	// Let's insert 5000 new keys with size matching the old slots (approx 32-48 bytes).
	// If reuse works, file size should NOT increase significantly.
	fmt.Println("\n--- Phase 3: Insert 5k New Keys (Test Reuse) ---")
	reuseKeysCount := 5000
	start = time.Now()
	
	for i := 0; i < reuseKeysCount; i++ {
		key := fmt.Sprintf("reuse.key.%d", i)
		// Length matches initial inserts (approx 30-40 bytes)
		val := fmt.Sprintf("new-value-reuse-%d-%s", i, randomString(15))
		if err := e.Set(key, val, uint64(TotalKeys+UpdateCount+i)); err != nil {
			panic(err)
		}
	}
	
	duration = time.Since(start)
	qps = float64(reuseKeysCount) / duration.Seconds()
	printStats("InsertReuse", reuseKeysCount, duration, qps, getFileSize(DataDir+"/values.data"))


	// 4. Delete 10k Keys
	fmt.Println("\n--- Phase 4: Delete 10k Keys (Simulated via Update) ---")
	// Using "DELETED_MARKER" which is short, so it will be In-Place update.
	// This won't test FreeList populating, but In-Place logic.
	// To test FreeList populating from Delete, we need a real Delete() or Update to smaller size that frees extra space?
	// But our storage doesn't split blocks.
	// So let's stick to standard benchmark.
	deleteKeys := keys[UpdateCount : UpdateCount+DeleteCount]
	start = time.Now()
	
	wg = sync.WaitGroup{}
	chunkSize = DeleteCount / workers

	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			base := id * chunkSize
			for i := 0; i < chunkSize; i++ {
				idx := base + i
				key := deleteKeys[idx]
				if err := e.Set(key, "DELETED_MARKER", uint64(TotalKeys+UpdateCount+reuseKeysCount+idx)); err != nil {
					panic(err)
				}
			}
		}(w)
	}
	wg.Wait()

	duration = time.Since(start)
	qps = float64(DeleteCount) / duration.Seconds()
	printStats("Delete", DeleteCount, duration, qps, getFileSize(DataDir+"/values.data"))

	// 5. Query Performance Test
	fmt.Println("\n--- Phase 5: Query Performance Test ---")
	
	// 5.1 Single Key Point Lookup
	start = time.Now()
	qPointCount := 100000
	wg = sync.WaitGroup{}
	chunkSize = qPointCount / workers
	
	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			base := id * chunkSize
			for i := 0; i < chunkSize; i++ {
				// Query: key = "bench.key.12345"
				idx := base + i
				key := keys[idx]
				query := fmt.Sprintf(`key = "%s"`, key)
				_, _ = e.Query(query)
			}
		}(w)
	}
	wg.Wait()
	duration = time.Since(start)
	qps = float64(qPointCount) / duration.Seconds()
	printStats("Query(Point)", qPointCount, duration, qps, 0)


	// 5.2 Query Metadata Only (Key/CommitIndex) - Should be fast (No IO)
	start = time.Now()
	qMetaCount := 50000
	var metaHits int64
	
	wg = sync.WaitGroup{}
	chunkSize = qMetaCount / workers
	
	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			// Query for keys starting with "bench.key.99" (Last 1000 keys)
			// This exercises the scan but filters on Key (Metadata)
			// Note: Our Query implementation scans ALL keys, so total items processed = TotalKeys * qMetaCount / workers
			// This is extremely heavy if not optimized.
			// Let's run a smaller number of queries.
			for i := 0; i < chunkSize; i++ {
				// Query: key like "bench.key.999*"
				res, _ := e.Query(`key like "bench.key.999*"`)
				atomic.AddInt64(&metaHits, int64(len(res)))
			}
		}()
	}
	wg.Wait()
	duration = time.Since(start)
	qps = float64(qMetaCount) / duration.Seconds()
	printStats("Query(Meta)", qMetaCount, duration, qps, 0)

	// 5.3 Query with Pagination (LIMIT/OFFSET)
	// We use the same Meta query but with LIMIT 10 to test pagination speedup
	start = time.Now()
	qPageCount := 50000
	var pageHits int64
	
	wg = sync.WaitGroup{}
	chunkSize = qPageCount / workers
	
	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for i := 0; i < chunkSize; i++ {
				// Query with LIMIT 10.
				// Note: Currently Engine scans fully then truncates. 
				// Optimization would be to push down Limit, but Sharding makes it hard.
				res, _ := e.Query(`key like "bench.key.999*" LIMIT 10`)
				atomic.AddInt64(&pageHits, int64(len(res)))
			}
		}()
	}
	wg.Wait()
	duration = time.Since(start)
	qps = float64(qPageCount) / duration.Seconds()
	printStats("Query(Limit10)", qPageCount, duration, qps, 0)

	// 5.4 Query Value (Needs IO)
	start = time.Now()
	qValCount := 100 // Smaller count because full scan IO is slow
	var valHits int64
	
	wg = sync.WaitGroup{}
	chunkSize = qValCount / workers
	if chunkSize == 0 { chunkSize = 1; workers = qValCount }

	for w := 0; w < workers; w++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for i := 0; i < chunkSize; i++ {
				// Query: value like "*data-999*"
				res, _ := e.Query(`value like "*data-999*"`)
				atomic.AddInt64(&valHits, int64(len(res)))
			}
		}()
	}
	wg.Wait()
	duration = time.Since(start)
	qps = float64(qValCount) / duration.Seconds()
	printStats("Query(Val)", qValCount, duration, qps, 0)


	// 6. Verification
	fmt.Println("\n--- Phase 6: Verification (Full Scan) ---")
	
	errors := 0
	
	// 1. Check Updated Keys (0-9999)
	for i := 0; i < UpdateCount; i++ {
		val, ok := e.Get(keys[i])
		if !ok {
			fmt.Printf("Error: Key %s not found\n", keys[i])
			errors++
			continue
		}
		if i%2 == 0 {
			// Should be long
			if len(val) < 40 { // Our long value is > 40 chars
				fmt.Printf("Error: Key %s should be long, got: %s\n", keys[i], val)
				errors++
			}
		} else {
			// Should be "short"
			if val != "short" {
				fmt.Printf("Error: Key %s should be 'short', got: %s\n", keys[i], val)
				errors++
			}
		}
	}
	
	// 2. Check Deleted Keys (10000-19999)
	for i := UpdateCount; i < UpdateCount+DeleteCount; i++ {
		val, ok := e.Get(keys[i])
		if !ok {
			fmt.Printf("Error: Key %s not found\n", keys[i])
			errors++
			continue
		}
		if val != "DELETED_MARKER" {
			fmt.Printf("Error: Key %s should be deleted, got: %s\n", keys[i], val)
			errors++
		}
	}
	
	// 3. Check Original Keys (20000-99999)
	for i := UpdateCount + DeleteCount; i < TotalKeys; i++ {
		val, ok := e.Get(keys[i])
		if !ok {
			fmt.Printf("Error: Key %s not found\n", keys[i])
			errors++
			continue
		}
		// Original values start with "value-data-"
		if len(val) < 10 || val[:11] != "value-data-" {
			fmt.Printf("Error: Key %s should be original, got: %s\n", keys[i], val)
			errors++
		}
	}
	
	// 4. Check Reused Keys (Extra 5000)
	for i := 0; i < 5000; i++ { // reuseKeysCount was hardcoded as 5000 inside main
		key := fmt.Sprintf("reuse.key.%d", i)
		val, ok := e.Get(key)
		if !ok {
			fmt.Printf("Error: Reuse Key %s not found\n", key)
			errors++
			continue
		}
		if len(val) < 10 || val[:16] != "new-value-reuse-" {
			fmt.Printf("Error: Reuse Key %s mismatch, got: %s\n", key, val)
			errors++
		}
	}

	if errors == 0 {
		fmt.Println("Integrity Check: PASS (All keys verified successfully)")
	} else {
		fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
	}
}

func randomString(n int) string {
	const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
	b := make([]byte, n)
	for i := range b {
		b[i] = letters[rand.Intn(len(letters))]
	}
	return string(b)
}

func getFileSize(path string) int64 {
	fi, err := os.Stat(path)
	if err != nil {
		return 0
	}
	return fi.Size()
}

func printStats(op string, count int, d time.Duration, qps float64, size int64) {
	sizeStr := ""
	if size > 0 {
		sizeStr = fmt.Sprintf(" | DB Size: %.2f MB", float64(size)/1024/1024)
	}
	fmt.Printf("%s: %d ops in %v | QPS: %.0f%s\n", op, count, d, qps, sizeStr)
}