benchmark.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. package main
  2. import (
  3. "fmt"
  4. "math/rand"
  5. "os"
  6. "sync"
  7. "sync/atomic"
  8. "time"
  9. "igit.com/xbase/raft/db"
  10. )
  11. const (
  12. TotalKeys = 100000
  13. UpdateCount = 10000
  14. DeleteCount = 10000
  15. QueryCount = 100000
  16. DataDir = "bench_db_data"
  17. ValueBaseSize = 32 // Base size for values
  18. )
  19. func main() {
  20. // Clean up previous run
  21. os.RemoveAll(DataDir)
  22. fmt.Printf("Initializing DB Engine in %s...\n", DataDir)
  23. e, err := db.NewEngine(DataDir)
  24. if err != nil {
  25. panic(err)
  26. }
  27. defer e.Close()
  28. // 1. Bulk Insert 100k keys
  29. fmt.Println("\n--- Phase 1: Bulk Insert 100k Keys ---")
  30. keys := make([]string, TotalKeys)
  31. start := time.Now()
  32. // Pre-generate keys to avoid benchmark overhead
  33. for i := 0; i < TotalKeys; i++ {
  34. keys[i] = fmt.Sprintf("bench.key.%d", i)
  35. }
  36. var wg sync.WaitGroup
  37. workers := 10 // Concurrent workers
  38. chunkSize := TotalKeys / workers
  39. var insertOps int64
  40. for w := 0; w < workers; w++ {
  41. wg.Add(1)
  42. go func(id int) {
  43. defer wg.Done()
  44. base := id * chunkSize
  45. for i := 0; i < chunkSize; i++ {
  46. idx := base + i
  47. // Random value string of approx ValueBaseSize
  48. // Using consistent size initially to test reuse later
  49. val := fmt.Sprintf("value-data-%d-%s", idx, randomString(15))
  50. if err := e.Set(keys[idx], val, uint64(idx)); err != nil {
  51. panic(err)
  52. }
  53. atomic.AddInt64(&insertOps, 1)
  54. }
  55. }(w)
  56. }
  57. wg.Wait()
  58. duration := time.Since(start)
  59. qps := float64(TotalKeys) / duration.Seconds()
  60. printStats("Insert", TotalKeys, duration, qps, getFileSize(DataDir+"/values.data"))
  61. // 2. Update 10k Keys (Mixed Strategy)
  62. // Strategy:
  63. // - 50% Longer: Triggers Append + Mark Old Deleted (Old slots become Free)
  64. // - 50% Shorter: Triggers In-Place Update (No FreeList change)
  65. // But wait, with FreeList, the "Longer" updates will generate Free slots.
  66. // Can we verify if subsequent inserts reuse them?
  67. fmt.Println("\n--- Phase 2: Update 10k Keys (50% Long, 50% Short) ---")
  68. start = time.Now()
  69. updateKeys := keys[:UpdateCount]
  70. wg = sync.WaitGroup{}
  71. chunkSize = UpdateCount / workers
  72. for w := 0; w < workers; w++ {
  73. wg.Add(1)
  74. go func(id int) {
  75. defer wg.Done()
  76. base := id * chunkSize
  77. for i := 0; i < chunkSize; i++ {
  78. idx := base + i
  79. key := updateKeys[idx]
  80. var val string
  81. if idx%2 == 0 {
  82. // Longer value (Trigger Append, Release Old Slot to FreeList)
  83. // Old cap was likely 32 or 48. New cap will be larger.
  84. val = fmt.Sprintf("updated-long-value-%d-%s-padding-padding", idx, randomString(40))
  85. } else {
  86. // Shorter value (Trigger In-Place)
  87. val = "short"
  88. }
  89. if err := e.Set(key, val, uint64(TotalKeys+idx)); err != nil {
  90. panic(err)
  91. }
  92. }
  93. }(w)
  94. }
  95. wg.Wait()
  96. duration = time.Since(start)
  97. qps = float64(UpdateCount) / duration.Seconds()
  98. printStats("Update", UpdateCount, duration, qps, getFileSize(DataDir+"/values.data"))
  99. // 3. Insert New Data to Test Reuse
  100. // We generated ~5000 free slots (from the Long updates) in Phase 2.
  101. // Let's insert 5000 new keys with size matching the old slots (approx 32-48 bytes).
  102. // If reuse works, file size should NOT increase significantly.
  103. fmt.Println("\n--- Phase 3: Insert 5k New Keys (Test Reuse) ---")
  104. reuseKeysCount := 5000
  105. start = time.Now()
  106. for i := 0; i < reuseKeysCount; i++ {
  107. key := fmt.Sprintf("reuse.key.%d", i)
  108. // Length matches initial inserts (approx 30-40 bytes)
  109. val := fmt.Sprintf("new-value-reuse-%d-%s", i, randomString(15))
  110. if err := e.Set(key, val, uint64(TotalKeys+UpdateCount+i)); err != nil {
  111. panic(err)
  112. }
  113. }
  114. duration = time.Since(start)
  115. qps = float64(reuseKeysCount) / duration.Seconds()
  116. printStats("InsertReuse", reuseKeysCount, duration, qps, getFileSize(DataDir+"/values.data"))
  117. // 4. Delete 10k Keys
  118. fmt.Println("\n--- Phase 4: Delete 10k Keys (Simulated via Update) ---")
  119. // Using "DELETED_MARKER" which is short, so it will be In-Place update.
  120. // This won't test FreeList populating, but In-Place logic.
  121. // To test FreeList populating from Delete, we need a real Delete() or Update to smaller size that frees extra space?
  122. // But our storage doesn't split blocks.
  123. // So let's stick to standard benchmark.
  124. deleteKeys := keys[UpdateCount : UpdateCount+DeleteCount]
  125. start = time.Now()
  126. wg = sync.WaitGroup{}
  127. chunkSize = DeleteCount / workers
  128. for w := 0; w < workers; w++ {
  129. wg.Add(1)
  130. go func(id int) {
  131. defer wg.Done()
  132. base := id * chunkSize
  133. for i := 0; i < chunkSize; i++ {
  134. idx := base + i
  135. key := deleteKeys[idx]
  136. if err := e.Set(key, "DELETED_MARKER", uint64(TotalKeys+UpdateCount+reuseKeysCount+idx)); err != nil {
  137. panic(err)
  138. }
  139. }
  140. }(w)
  141. }
  142. wg.Wait()
  143. duration = time.Since(start)
  144. qps = float64(DeleteCount) / duration.Seconds()
  145. printStats("Delete", DeleteCount, duration, qps, getFileSize(DataDir+"/values.data"))
  146. // 5. Query Performance Test
  147. fmt.Println("\n--- Phase 5: Query Performance Test ---")
  148. // 5.1 Query Metadata Only (Key/CommitIndex) - Should be fast (No IO)
  149. start = time.Now()
  150. qMetaCount := 5000
  151. var metaHits int64
  152. wg = sync.WaitGroup{}
  153. chunkSize = qMetaCount / workers
  154. for w := 0; w < workers; w++ {
  155. wg.Add(1)
  156. go func() {
  157. defer wg.Done()
  158. // Query for keys starting with "bench.key.99" (Last 1000 keys)
  159. // This exercises the scan but filters on Key (Metadata)
  160. // Note: Our Query implementation scans ALL keys, so total items processed = TotalKeys * qMetaCount / workers
  161. // This is extremely heavy if not optimized.
  162. // Let's run a smaller number of queries.
  163. for i := 0; i < chunkSize; i++ {
  164. // Query: key like "bench.key.999*"
  165. res, _ := e.Query(`key like "bench.key.999*"`)
  166. atomic.AddInt64(&metaHits, int64(len(res)))
  167. }
  168. }()
  169. }
  170. wg.Wait()
  171. duration = time.Since(start)
  172. qps = float64(qMetaCount) / duration.Seconds()
  173. printStats("Query(Meta)", qMetaCount, duration, qps, 0)
  174. // 5.2 Query Value (Needs IO)
  175. start = time.Now()
  176. qValCount := 100 // Smaller count because full scan IO is slow
  177. var valHits int64
  178. wg = sync.WaitGroup{}
  179. chunkSize = qValCount / workers
  180. if chunkSize == 0 { chunkSize = 1; workers = qValCount }
  181. for w := 0; w < workers; w++ {
  182. wg.Add(1)
  183. go func() {
  184. defer wg.Done()
  185. for i := 0; i < chunkSize; i++ {
  186. // Query: value like "*data-999*"
  187. res, _ := e.Query(`value like "*data-999*"`)
  188. atomic.AddInt64(&valHits, int64(len(res)))
  189. }
  190. }()
  191. }
  192. wg.Wait()
  193. duration = time.Since(start)
  194. qps = float64(qValCount) / duration.Seconds()
  195. printStats("Query(Val)", qValCount, duration, qps, 0)
  196. // 6. Verification
  197. fmt.Println("\n--- Phase 6: Verification (Full Scan) ---")
  198. errors := 0
  199. // 1. Check Updated Keys (0-9999)
  200. for i := 0; i < UpdateCount; i++ {
  201. val, ok := e.Get(keys[i])
  202. if !ok {
  203. fmt.Printf("Error: Key %s not found\n", keys[i])
  204. errors++
  205. continue
  206. }
  207. if i%2 == 0 {
  208. // Should be long
  209. if len(val) < 40 { // Our long value is > 40 chars
  210. fmt.Printf("Error: Key %s should be long, got: %s\n", keys[i], val)
  211. errors++
  212. }
  213. } else {
  214. // Should be "short"
  215. if val != "short" {
  216. fmt.Printf("Error: Key %s should be 'short', got: %s\n", keys[i], val)
  217. errors++
  218. }
  219. }
  220. }
  221. // 2. Check Deleted Keys (10000-19999)
  222. for i := UpdateCount; i < UpdateCount+DeleteCount; i++ {
  223. val, ok := e.Get(keys[i])
  224. if !ok {
  225. fmt.Printf("Error: Key %s not found\n", keys[i])
  226. errors++
  227. continue
  228. }
  229. if val != "DELETED_MARKER" {
  230. fmt.Printf("Error: Key %s should be deleted, got: %s\n", keys[i], val)
  231. errors++
  232. }
  233. }
  234. // 3. Check Original Keys (20000-99999)
  235. for i := UpdateCount + DeleteCount; i < TotalKeys; i++ {
  236. val, ok := e.Get(keys[i])
  237. if !ok {
  238. fmt.Printf("Error: Key %s not found\n", keys[i])
  239. errors++
  240. continue
  241. }
  242. // Original values start with "value-data-"
  243. if len(val) < 10 || val[:11] != "value-data-" {
  244. fmt.Printf("Error: Key %s should be original, got: %s\n", keys[i], val)
  245. errors++
  246. }
  247. }
  248. // 4. Check Reused Keys (Extra 5000)
  249. for i := 0; i < 5000; i++ { // reuseKeysCount was hardcoded as 5000 inside main
  250. key := fmt.Sprintf("reuse.key.%d", i)
  251. val, ok := e.Get(key)
  252. if !ok {
  253. fmt.Printf("Error: Reuse Key %s not found\n", key)
  254. errors++
  255. continue
  256. }
  257. if len(val) < 10 || val[:16] != "new-value-reuse-" {
  258. fmt.Printf("Error: Reuse Key %s mismatch, got: %s\n", key, val)
  259. errors++
  260. }
  261. }
  262. if errors == 0 {
  263. fmt.Println("Integrity Check: PASS (All keys verified successfully)")
  264. } else {
  265. fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
  266. }
  267. }
  268. func randomString(n int) string {
  269. const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
  270. b := make([]byte, n)
  271. for i := range b {
  272. b[i] = letters[rand.Intn(len(letters))]
  273. }
  274. return string(b)
  275. }
  276. func getFileSize(path string) int64 {
  277. fi, err := os.Stat(path)
  278. if err != nil {
  279. return 0
  280. }
  281. return fi.Size()
  282. }
  283. func printStats(op string, count int, d time.Duration, qps float64, size int64) {
  284. sizeStr := ""
  285. if size > 0 {
  286. sizeStr = fmt.Sprintf(" | DB Size: %.2f MB", float64(size)/1024/1024)
  287. }
  288. fmt.Printf("%s: %d ops in %v | QPS: %.0f%s\n", op, count, d, qps, sizeStr)
  289. }