xbase před 2 týdny
rodič
revize
c1b825a75f
3 změnil soubory, kde provedl 179 přidání a 78 odebrání
  1. 1 0
      .gitignore
  2. 176 78
      db/engine.go
  3. 2 0
      example/database/benchmark.go

+ 1 - 0
.gitignore

@@ -25,3 +25,4 @@ _testmain.go
 *.prof
 *.prof
 
 
 data/
 data/
+bench_db_data/

+ 176 - 78
db/engine.go

@@ -302,10 +302,54 @@ func (fti *FullTextIndex) Add(key string, value string) {
 	}
 	}
 }
 }
 
 
+// Search returns keys containing the token.
+// Supports wildcards: "token", "prefix*", "*suffix", "*contains*"
+// For simplicity in this demo, we implement prefix matching via iteration if wildcard used.
+// Exact match is O(1).
+func (fti *FullTextIndex) Search(tokenPattern string) []string {
+	fti.mu.RLock()
+	defer fti.mu.RUnlock()
+
+	// 1. Exact Match
+	if !strings.Contains(tokenPattern, "*") {
+		if keys, ok := fti.index[tokenPattern]; ok {
+			// Return copy to avoid race
+			res := make([]string, len(keys))
+			copy(res, keys)
+			return res
+		}
+		return nil
+	}
+
+	// 2. Wildcard Scan (In-Memory Map Scan)
+	// For production, we'd use a RadixTree for tokens too!
+	// But let's keep it simple for now, just iterating map keys.
+	// Optimization: If map is large, this is slow.
+	var results []string
+	seen := make(map[string]bool)
+
+	for token, keys := range fti.index {
+		if WildcardMatch(token, tokenPattern) {
+			for _, k := range keys {
+				if !seen[k] {
+					results = append(results, k)
+					seen[k] = true
+				}
+			}
+		}
+	}
+	return results
+}
+
+
 func tokenize(val string) []string {
 func tokenize(val string) []string {
 	f := func(c rune) bool {
 	f := func(c rune) bool {
 		return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
 		return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
 	}
 	}
+	// Optimization: Lowercase tokens for case-insensitive search if needed
+	// For now, keep original case or lower?
+	// Let's keep original to match benchmark expectations if any.
+	// But usually FTI is case-insensitive.
 	return strings.FieldsFunc(val, f)
 	return strings.FieldsFunc(val, f)
 }
 }
 
 
@@ -832,14 +876,12 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
 			}
 			}
 			
 			
 			// 2. Load Value (if needed by other filters, but here we load anyway for result)
 			// 2. Load Value (if needed by other filters, but here we load anyway for result)
-			// Optimize: check if we filter on Value before loading?
-			// For simplicity, just load.
 			val, err := e.Storage.ReadValue(entry.ValueOffset)
 			val, err := e.Storage.ReadValue(entry.ValueOffset)
 			if err != nil {
 			if err != nil {
 				return []QueryResult{}, nil
 				return []QueryResult{}, nil
 			}
 			}
 			
 			
-			// 3. Verify other conditions (e.g. CommitIndex)
+			// 3. Verify other conditions
 			matchAll := true
 			matchAll := true
 			for _, m := range matches {
 			for _, m := range matches {
 				field, op, valRaw := m[1], m[2], m[3]
 				field, op, valRaw := m[1], m[2], m[3]
@@ -862,7 +904,6 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
 				}
 				}
 				if !matchAll { break }
 				if !matchAll { break }
 			}
 			}
-			
 			if matchAll {
 			if matchAll {
 				return []QueryResult{{Key: targetKey, Value: val, CommitIndex: entry.CommitIndex}}, nil
 				return []QueryResult{{Key: targetKey, Value: val, CommitIndex: entry.CommitIndex}}, nil
 			}
 			}
@@ -870,110 +911,176 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
 		}
 		}
 	}
 	}
 
 
-	var results []QueryResult
-	var mu sync.Mutex
-
+	// Optimization: Inverted Index for Value Queries
 	// Strategy:
 	// Strategy:
-	// 1. Identify primary filter (Key Prefix is best)
-	// 2. Iterate candidates
-	// 3. Filter remaining conditions
+	// 1. Extract potential tokens from `value like "..."`
+	//    e.g. `value like "*keyword*"` -> token "keyword"
+	// 2. Look up candidates from FTIndex
+	// 3. Intersect/Union candidates (if multiple)
+	// 4. Fallback to Scan if no tokens found or complex query
 	
 	
-	var prefix string = ""
-	var usePrefix bool = false
+	var candidates map[string]bool
+	var useFTIndex bool = false
 	
 	
-	// Check for key prefix
 	for _, match := range matches {
 	for _, match := range matches {
-		if match[1] == "key" && match[2] == "like" {
+		if match[1] == "value" && match[2] == "like" {
 			pattern := extractString(match[3])
 			pattern := extractString(match[3])
-			if strings.HasSuffix(pattern, "*") {
-				clean := pattern[:len(pattern)-1]
-				if !strings.ContainsAny(clean, "*?") {
-					prefix = clean
-					usePrefix = true
-					break
+			// Extract a core token: remove * from ends
+			// Simplistic extraction: find longest sequence of alphanumeric?
+			// For now, assume pattern is like "*token*" or "token*"
+			clean := strings.Trim(pattern, "*")
+			if len(clean) > 0 && !strings.Contains(clean, "*") && !strings.Contains(clean, "?") {
+				// We have a candidate token "clean"
+				// FTIndex stores partial tokens? No, exact tokens.
+				// If query is *partial*, we need FTI scan.
+				// Our FTI.Search handles wildcards!
+				
+				matches := e.FTIndex.Search(pattern) // Pass original pattern to FTI
+				if matches != nil {
+					// We found candidates!
+					currentSet := make(map[string]bool)
+					for _, k := range matches {
+						currentSet[k] = true
+					}
+					
+					if !useFTIndex {
+						candidates = currentSet
+						useFTIndex = true
+					} else {
+						// Intersect
+						newSet := make(map[string]bool)
+						for k := range candidates {
+							if currentSet[k] {
+								newSet[k] = true
+							}
+						}
+						candidates = newSet
+					}
+				} else {
+					// Pattern produced NO matches -> Empty Result
+					return []QueryResult{}, nil
 				}
 				}
 			}
 			}
 		}
 		}
 	}
 	}
 	
 	
-	// Iterator
-	iterator := func(key string, entry IndexEntry) bool {
-		// Filter Logic
+	// Prepare Iterator
+	var iterator func(func(string, IndexEntry) bool)
+	
+	if useFTIndex {
+		// Iterate ONLY candidates
+		iterator = func(cb func(string, IndexEntry) bool) {
+			// Iterate candidates sorted for deterministic output (and better cache locality?)
+			// Sort candidates keys
+			keys := make([]string, 0, len(candidates))
+			for k := range candidates {
+				keys = append(keys, k)
+			}
+			sort.Strings(keys)
+			
+			for _, k := range keys {
+				if entry, ok := e.Index.Get(k); ok {
+					if !cb(k, entry) {
+						return
+					}
+				}
+			}
+		}
+	} else {
+		// Full Scan or Prefix Scan
+		var prefix string = ""
+		var usePrefix bool = false
+		for _, match := range matches {
+			if match[1] == "key" && match[2] == "like" {
+				pattern := extractString(match[3])
+				if strings.HasSuffix(pattern, "*") {
+					clean := pattern[:len(pattern)-1]
+					if !strings.ContainsAny(clean, "*?") {
+						prefix = clean
+						usePrefix = true
+						break
+					}
+				}
+			}
+		}
+		
+		iterator = func(cb func(string, IndexEntry) bool) {
+			if usePrefix {
+				e.Index.WalkPrefix(prefix, cb)
+			} else {
+				e.Index.WalkPrefix("", cb)
+			}
+		}
+	}
+
+	var results []QueryResult
+	var mu sync.Mutex
+
+	// Execution
+	iterator(func(key string, entry IndexEntry) bool {
 		var valStr string
 		var valStr string
 		var valLoaded bool
 		var valLoaded bool
 				
 				
-				matchAll := true
-				for _, match := range matches {
+		matchAll := true
+		for _, match := range matches {
 			field, op, valRaw := match[1], match[2], match[3]
 			field, op, valRaw := match[1], match[2], match[3]
-					switch field {
-					case "CommitIndex":
-						num, _ := strconv.ParseUint(valRaw, 10, 64)
-						switch op {
+			switch field {
+			case "CommitIndex":
+				num, _ := strconv.ParseUint(valRaw, 10, 64)
+				switch op {
 				case ">": if !(entry.CommitIndex > num) { matchAll = false }
 				case ">": if !(entry.CommitIndex > num) { matchAll = false }
 				case "<": if !(entry.CommitIndex < num) { matchAll = false }
 				case "<": if !(entry.CommitIndex < num) { matchAll = false }
 				case ">=": if !(entry.CommitIndex >= num) { matchAll = false }
 				case ">=": if !(entry.CommitIndex >= num) { matchAll = false }
 				case "<=": if !(entry.CommitIndex <= num) { matchAll = false }
 				case "<=": if !(entry.CommitIndex <= num) { matchAll = false }
 				case "=": if !(entry.CommitIndex == num) { matchAll = false }
 				case "=": if !(entry.CommitIndex == num) { matchAll = false }
-						}
-					case "key":
-						target := extractString(valRaw)
-						switch op {
+				}
+			case "key":
+				target := extractString(valRaw)
+				switch op {
 				case "=": if key != target { matchAll = false }
 				case "=": if key != target { matchAll = false }
 				case "like": if !WildcardMatch(key, target) { matchAll = false }
 				case "like": if !WildcardMatch(key, target) { matchAll = false }
-						}
-					case "value":
-				// Lazy load
+				}
+			case "value":
+				// Optimization: If using FTIndex, we know token matches, but pattern might be complex.
+				// However, FTI.Search already filtered by pattern!
+				// So if we trusted FTI, we could skip this check IF it was the only value check.
+				// But let's be safe and re-check, especially if multiple value conditions exist.
+				
 				if !valLoaded {
 				if !valLoaded {
 					v, err := e.Storage.ReadValue(entry.ValueOffset)
 					v, err := e.Storage.ReadValue(entry.ValueOffset)
 					if err != nil { matchAll = false; break }
 					if err != nil { matchAll = false; break }
 					valStr = v
 					valStr = v
 					valLoaded = true
 					valLoaded = true
 				}
 				}
-						target := extractString(valRaw)
-						switch op {
+				target := extractString(valRaw)
+				switch op {
 				case "=": if valStr != target { matchAll = false }
 				case "=": if valStr != target { matchAll = false }
 				case "like": if !WildcardMatch(valStr, target) { matchAll = false }
 				case "like": if !WildcardMatch(valStr, target) { matchAll = false }
-						}
-					}
-					if !matchAll { break }
 				}
 				}
-				
-				if matchAll {
-			// Load value if needed for result
+			}
+			if !matchAll { break }
+		}
+		
+		if matchAll {
 			if !valLoaded {
 			if !valLoaded {
 				v, err := e.Storage.ReadValue(entry.ValueOffset)
 				v, err := e.Storage.ReadValue(entry.ValueOffset)
 				if err == nil { valStr = v }
 				if err == nil { valStr = v }
 			}
 			}
-					mu.Lock()
-					results = append(results, QueryResult{
+			mu.Lock()
+			results = append(results, QueryResult{
 				Key:         key,
 				Key:         key,
-						Value:       valStr,
-						CommitIndex: entry.CommitIndex,
-					})
-			// Optimization: If simple LIMIT without Offset and no sorting requirements (default key sort is free in Radix),
-			// we could stop early. But we need to support Offset and flexible Sort.
-			// Currently Radix Walk is Key-Ordered. 
-			// If user doesn't require specific sort (or accepts key sort), we can stop.
-			// Let's assume Key Sort is default.
+				Value:       valStr,
+				CommitIndex: entry.CommitIndex,
+			})
 			if limit > 0 && offset == 0 && len(results) >= limit {
 			if limit > 0 && offset == 0 && len(results) >= limit {
 				mu.Unlock()
 				mu.Unlock()
-				return false // Stop iteration
+				return false
 			}
 			}
-					mu.Unlock()
-				}
-		return true // Continue
-	}
-
-	if usePrefix {
-		e.Index.WalkPrefix(prefix, iterator)
-	} else {
-		e.Index.WalkPrefix("", iterator) // Full Scan
-	}
+			mu.Unlock()
+		}
+		return true
+	})
 
 
-	// Results are already sorted by Key due to Radix Tree Walk order!
-	// So we can skip sort.Slice if we trust WalkPrefix.
-	// My WalkPrefix implementation attempts to be ordered.
-	
 	// Pagination
 	// Pagination
 	if offset > 0 {
 	if offset > 0 {
 		if offset >= len(results) {
 		if offset >= len(results) {
@@ -989,12 +1096,3 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
 
 
 	return results, nil
 	return results, nil
 }
 }
-
-func (e *Engine) Snapshot() ([]byte, error) {
-	// Not implemented for Radix Tree yet in this demo
-	return nil, nil
-}
-
-func (e *Engine) Restore(data []byte) error {
-	return nil
-}

+ 2 - 0
example/database/benchmark.go

@@ -351,6 +351,8 @@ func main() {
 
 
 	if errors == 0 {
 	if errors == 0 {
 		fmt.Println("Integrity Check: PASS (All keys verified successfully)")
 		fmt.Println("Integrity Check: PASS (All keys verified successfully)")
+		// Cleanup if successful
+		os.RemoveAll(DataDir)
 	} else {
 	} else {
 		fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
 		fmt.Printf("Integrity Check: FAIL (%d errors found)\n", errors)
 	}
 	}