benchmark.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package main
  2. import (
  3. "fmt"
  4. "math/rand"
  5. "os"
  6. "sync"
  7. "sync/atomic"
  8. "time"
  9. "igit.com/xbase/raft/db"
  10. )
  11. const (
  12. TotalKeys = 100000
  13. UpdateCount = 10000
  14. DeleteCount = 10000
  15. QueryCount = 100000
  16. DataDir = "bench_db_data"
  17. ValueBaseSize = 32 // Base size for values
  18. )
  19. func main() {
  20. // Clean up previous run
  21. os.RemoveAll(DataDir)
  22. fmt.Printf("Initializing DB Engine in %s...\n", DataDir)
  23. e, err := db.NewEngine(DataDir)
  24. if err != nil {
  25. panic(err)
  26. }
  27. defer e.Close()
  28. // 1. Bulk Insert 100k keys
  29. fmt.Println("\n--- Phase 1: Bulk Insert 100k Keys ---")
  30. keys := make([]string, TotalKeys)
  31. start := time.Now()
  32. // Pre-generate keys to avoid benchmark overhead
  33. for i := 0; i < TotalKeys; i++ {
  34. keys[i] = fmt.Sprintf("bench.key.%d", i)
  35. }
  36. var wg sync.WaitGroup
  37. workers := 10 // Concurrent workers
  38. chunkSize := TotalKeys / workers
  39. var insertOps int64
  40. for w := 0; w < workers; w++ {
  41. wg.Add(1)
  42. go func(id int) {
  43. defer wg.Done()
  44. base := id * chunkSize
  45. for i := 0; i < chunkSize; i++ {
  46. idx := base + i
  47. // Random value string of approx ValueBaseSize
  48. // Using consistent size initially to test reuse later
  49. val := fmt.Sprintf("value-data-%d-%s", idx, randomString(15))
  50. if err := e.Set(keys[idx], val, uint64(idx)); err != nil {
  51. panic(err)
  52. }
  53. atomic.AddInt64(&insertOps, 1)
  54. }
  55. }(w)
  56. }
  57. wg.Wait()
  58. duration := time.Since(start)
  59. qps := float64(TotalKeys) / duration.Seconds()
  60. printStats("Insert", TotalKeys, duration, qps, getFileSize(DataDir+"/values.data"))
  61. // 2. Update 10k Keys (Mixed Strategy)
  62. // Strategy:
  63. // - 50% Longer: Triggers Append + Mark Old Deleted (Old slots become Free)
  64. // - 50% Shorter: Triggers In-Place Update (No FreeList change)
  65. // But wait, with FreeList, the "Longer" updates will generate Free slots.
  66. // Can we verify if subsequent inserts reuse them?
  67. fmt.Println("\n--- Phase 2: Update 10k Keys (50% Long, 50% Short) ---")
  68. start = time.Now()
  69. updateKeys := keys[:UpdateCount]
  70. wg = sync.WaitGroup{}
  71. chunkSize = UpdateCount / workers
  72. for w := 0; w < workers; w++ {
  73. wg.Add(1)
  74. go func(id int) {
  75. defer wg.Done()
  76. base := id * chunkSize
  77. for i := 0; i < chunkSize; i++ {
  78. idx := base + i
  79. key := updateKeys[idx]
  80. var val string
  81. if idx%2 == 0 {
  82. // Longer value (Trigger Append, Release Old Slot to FreeList)
  83. // Old cap was likely 32 or 48. New cap will be larger.
  84. val = fmt.Sprintf("updated-long-value-%d-%s-padding-padding", idx, randomString(40))
  85. } else {
  86. // Shorter value (Trigger In-Place)
  87. val = "short"
  88. }
  89. if err := e.Set(key, val, uint64(TotalKeys+idx)); err != nil {
  90. panic(err)
  91. }
  92. }
  93. }(w)
  94. }
  95. wg.Wait()
  96. duration = time.Since(start)
  97. qps = float64(UpdateCount) / duration.Seconds()
  98. printStats("Update", UpdateCount, duration, qps, getFileSize(DataDir+"/values.data"))
  99. // 3. Insert New Data to Test Reuse
  100. // We generated ~5000 free slots (from the Long updates) in Phase 2.
  101. // Let's insert 5000 new keys with size matching the old slots (approx 32-48 bytes).
  102. // If reuse works, file size should NOT increase significantly.
  103. fmt.Println("\n--- Phase 3: Insert 5k New Keys (Test Reuse) ---")
  104. reuseKeysCount := 5000
  105. start = time.Now()
  106. for i := 0; i < reuseKeysCount; i++ {
  107. key := fmt.Sprintf("reuse.key.%d", i)
  108. // Length matches initial inserts (approx 30-40 bytes)
  109. val := fmt.Sprintf("new-value-reuse-%d-%s", i, randomString(15))
  110. if err := e.Set(key, val, uint64(TotalKeys+UpdateCount+i)); err != nil {
  111. panic(err)
  112. }
  113. }
  114. duration = time.Since(start)
  115. qps = float64(reuseKeysCount) / duration.Seconds()
  116. printStats("InsertReuse", reuseKeysCount, duration, qps, getFileSize(DataDir+"/values.data"))
  117. // 4. Delete 10k Keys
  118. fmt.Println("\n--- Phase 4: Delete 10k Keys (Simulated via Update) ---")
  119. // Using "DELETED_MARKER" which is short, so it will be In-Place update.
  120. // This won't test FreeList populating, but In-Place logic.
  121. // To test FreeList populating from Delete, we need a real Delete() or Update to smaller size that frees extra space?
  122. // But our storage doesn't split blocks.
  123. // So let's stick to standard benchmark.
  124. deleteKeys := keys[UpdateCount : UpdateCount+DeleteCount]
  125. start = time.Now()
  126. wg = sync.WaitGroup{}
  127. chunkSize = DeleteCount / workers
  128. for w := 0; w < workers; w++ {
  129. wg.Add(1)
  130. go func(id int) {
  131. defer wg.Done()
  132. base := id * chunkSize
  133. for i := 0; i < chunkSize; i++ {
  134. idx := base + i
  135. key := deleteKeys[idx]
  136. if err := e.Set(key, "DELETED_MARKER", uint64(TotalKeys+UpdateCount+reuseKeysCount+idx)); err != nil {
  137. panic(err)
  138. }
  139. }
  140. }(w)
  141. }
  142. wg.Wait()
  143. duration = time.Since(start)
  144. qps = float64(DeleteCount) / duration.Seconds()
  145. printStats("Delete", DeleteCount, duration, qps, getFileSize(DataDir+"/values.data"))
  146. // 5. Query Performance Test
  147. fmt.Println("\n--- Phase 5: Query Performance Test ---")
  148. // 5.1 Single Key Point Lookup
  149. start = time.Now()
  150. qPointCount := 100000
  151. wg = sync.WaitGroup{}
  152. chunkSize = qPointCount / workers
  153. for w := 0; w < workers; w++ {
  154. wg.Add(1)
  155. go func(id int) {
  156. defer wg.Done()
  157. base := id * chunkSize
  158. for i := 0; i < chunkSize; i++ {
  159. // Query: key = "bench.key.12345"
  160. idx := base + i
  161. key := keys[idx]
  162. query := fmt.Sprintf(`key = "%s"`, key)
  163. _, _ = e.Query(query)
  164. }
  165. }(w)
  166. }
  167. wg.Wait()
  168. duration = time.Since(start)
  169. qps = float64(qPointCount) / duration.Seconds()
  170. printStats("Query(Point)", qPointCount, duration, qps, 0)
  171. // 5.2 Query Metadata Only (Key/CommitIndex) - Should be fast (No IO)
  172. start = time.Now()
  173. qMetaCount := 50000
  174. var metaHits int64
  175. wg = sync.WaitGroup{}
  176. chunkSize = qMetaCount / workers
  177. for w := 0; w < workers; w++ {
  178. wg.Add(1)
  179. go func() {
  180. defer wg.Done()
  181. // Query for keys starting with "bench.key.99" (Last 1000 keys)
  182. // This exercises the scan but filters on Key (Metadata)
  183. // Note: Our Query implementation scans ALL keys, so total items processed = TotalKeys * qMetaCount / workers
  184. // This is extremely heavy if not optimized.
  185. // Let's run a smaller number of queries.
  186. for i := 0; i < chunkSize; i++ {
  187. // Query: key like "bench.key.999*"
  188. res, _ := e.Query(`key like "bench.key.999*"`)
  189. atomic.AddInt64(&metaHits, int64(len(res)))
  190. }
  191. }()
  192. }
  193. wg.Wait()
  194. duration = time.Since(start)
  195. qps = float64(qMetaCount) / duration.Seconds()
  196. printStats("Query(Meta)", qMetaCount, duration, qps, 0)
  197. // 5.3 Query with Pagination (LIMIT/OFFSET)
  198. // We use the same Meta query but with LIMIT 10 to test pagination speedup
  199. start = time.Now()
  200. qPageCount := 50000
  201. var pageHits int64
  202. wg = sync.WaitGroup{}
  203. chunkSize = qPageCount / workers
  204. for w := 0; w < workers; w++ {
  205. wg.Add(1)
  206. go func() {
  207. defer wg.Done()
  208. for i := 0; i < chunkSize; i++ {
  209. // Query with LIMIT 10.
  210. // Note: Currently Engine scans fully then truncates.
  211. // Optimization would be to push down Limit, but Sharding makes it hard.
  212. res, _ := e.Query(`key like "bench.key.999*" LIMIT 10`)
  213. atomic.AddInt64(&pageHits, int64(len(res)))
  214. }
  215. }()
  216. }
  217. wg.Wait()
  218. duration = time.Since(start)
  219. qps = float64(qPageCount) / duration.Seconds()
  220. printStats("Query(Limit10)", qPageCount, duration, qps, 0)
  221. // 5.4 Query Value (Needs IO)
  222. start = time.Now()
  223. qValCount := 100 // Smaller count because full scan IO is slow
  224. var valHits int64
  225. wg = sync.WaitGroup{}
  226. chunkSize = qValCount / workers
  227. if chunkSize == 0 { chunkSize = 1; workers = qValCount }
  228. for w := 0; w < workers; w++ {
  229. wg.Add(1)
  230. go func() {
  231. defer wg.Done()
  232. for i := 0; i < chunkSize; i++ {
  233. // Query: value like "*data-999*"
  234. res, _ := e.Query(`value like "*data-999*"`)
  235. atomic.AddInt64(&valHits, int64(len(res)))
  236. }
  237. }()
  238. }
  239. wg.Wait()
  240. duration = time.Since(start)
  241. qps = float64(qValCount) / duration.Seconds()
  242. printStats("Query(Val)", qValCount, duration, qps, 0)
  243. // 6. Verification
  244. fmt.Println("\n--- Phase 6: Verification (Full Scan) ---")
  245. errors := 0
  246. // 1. Check Updated Keys (0-9999)
  247. for i := 0; i < UpdateCount; i++ {
  248. val, ok := e.Get(keys[i])
  249. if !ok {
  250. fmt.Printf("Error: Key %s not found\n", keys[i])
  251. errors++
  252. continue
  253. }
  254. if i%2 == 0 {
  255. // Should be long
  256. if len(val) < 40 { // Our long value is > 40 chars
  257. fmt.Printf("Error: Key %s should be long, got: %s\n", keys[i], val)
  258. errors++
  259. }
  260. } else {
  261. // Should be "short"
  262. if val != "short" {
  263. fmt.Printf("Error: Key %s should be 'short', got: %s\n", keys[i], val)
  264. errors++
  265. }
  266. }
  267. }
  268. // 2. Check Deleted Keys (10000-19999)
  269. for i := UpdateCount; i < UpdateCount+DeleteCount; i++ {
  270. val, ok := e.Get(keys[i])
  271. if !ok {
  272. fmt.Printf("Error: Key %s not found\n", keys[i])
  273. errors++
  274. continue
  275. }
  276. if val != "DELETED_MARKER" {
  277. fmt.Printf("Error: Key %s should be deleted, got: %s\n", keys[i], val)
  278. errors++
  279. }
  280. }
  281. // 3. Check Original Keys (20000-99999)
  282. for i := UpdateCount + DeleteCount; i < TotalKeys; i++ {
  283. val, ok := e.Get(keys[i])
  284. if !ok {
  285. fmt.Printf("Error: Key %s not found\n", keys[i])
  286. errors++
  287. continue
  288. }
  289. // Original values start with "value-data-"
  290. if len(val) < 10 || val[:11] != "value-data-" {
  291. fmt.Printf("Error: Key %s should be original, got: %s\n", keys[i], val)
  292. errors++
  293. }
  294. }
  295. // 4. Check Reused Keys (Extra 5000)
  296. for i := 0; i < 5000; i++ { // reuseKeysCount was hardcoded as 5000 inside main
  297. key := fmt.Sprintf("reuse.key.%d", i)
  298. val, ok := e.Get(key)
  299. if !ok {
  300. fmt.Printf("Error: Reuse Key %s not found\n", key)
  301. errors++
  302. continue
  303. }
  304. if len(val) < 10 || val[:16] != "new-value-reuse-" {
  305. fmt.Printf("Error: Reuse Key %s mismatch, got: %s\n", key, val)
  306. errors++
  307. }
  308. }
  309. if errors == 0 {
  310. fmt.Println("Integrity Check: PASS (All keys verified successfully)")
  311. } else {
  312. fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
  313. }
  314. }
  315. func randomString(n int) string {
  316. const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
  317. b := make([]byte, n)
  318. for i := range b {
  319. b[i] = letters[rand.Intn(len(letters))]
  320. }
  321. return string(b)
  322. }
  323. func getFileSize(path string) int64 {
  324. fi, err := os.Stat(path)
  325. if err != nil {
  326. return 0
  327. }
  328. return fi.Size()
  329. }
  330. func printStats(op string, count int, d time.Duration, qps float64, size int64) {
  331. sizeStr := ""
  332. if size > 0 {
  333. sizeStr = fmt.Sprintf(" | DB Size: %.2f MB", float64(size)/1024/1024)
  334. }
  335. fmt.Printf("%s: %d ops in %v | QPS: %.0f%s\n", op, count, d, qps, sizeStr)
  336. }