|
@@ -302,10 +302,54 @@ func (fti *FullTextIndex) Add(key string, value string) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+// Search returns keys containing the token.
|
|
|
|
|
+// Supports wildcards: "token", "prefix*", "*suffix", "*contains*"
|
|
|
|
|
+// For simplicity in this demo, we implement prefix matching via iteration if wildcard used.
|
|
|
|
|
+// Exact match is O(1).
|
|
|
|
|
+func (fti *FullTextIndex) Search(tokenPattern string) []string {
|
|
|
|
|
+ fti.mu.RLock()
|
|
|
|
|
+ defer fti.mu.RUnlock()
|
|
|
|
|
+
|
|
|
|
|
+ // 1. Exact Match
|
|
|
|
|
+ if !strings.Contains(tokenPattern, "*") {
|
|
|
|
|
+ if keys, ok := fti.index[tokenPattern]; ok {
|
|
|
|
|
+ // Return copy to avoid race
|
|
|
|
|
+ res := make([]string, len(keys))
|
|
|
|
|
+ copy(res, keys)
|
|
|
|
|
+ return res
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 2. Wildcard Scan (In-Memory Map Scan)
|
|
|
|
|
+ // For production, we'd use a RadixTree for tokens too!
|
|
|
|
|
+ // But let's keep it simple for now, just iterating map keys.
|
|
|
|
|
+ // Optimization: If map is large, this is slow.
|
|
|
|
|
+ var results []string
|
|
|
|
|
+ seen := make(map[string]bool)
|
|
|
|
|
+
|
|
|
|
|
+ for token, keys := range fti.index {
|
|
|
|
|
+ if WildcardMatch(token, tokenPattern) {
|
|
|
|
|
+ for _, k := range keys {
|
|
|
|
|
+ if !seen[k] {
|
|
|
|
|
+ results = append(results, k)
|
|
|
|
|
+ seen[k] = true
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return results
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
func tokenize(val string) []string {
|
|
func tokenize(val string) []string {
|
|
|
f := func(c rune) bool {
|
|
f := func(c rune) bool {
|
|
|
return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
|
|
return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
|
|
|
}
|
|
}
|
|
|
|
|
+ // Optimization: Lowercase tokens for case-insensitive search if needed
|
|
|
|
|
+ // For now, keep original case or lower?
|
|
|
|
|
+ // Let's keep original to match benchmark expectations if any.
|
|
|
|
|
+ // But usually FTI is case-insensitive.
|
|
|
return strings.FieldsFunc(val, f)
|
|
return strings.FieldsFunc(val, f)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -832,14 +876,12 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// 2. Load Value (if needed by other filters, but here we load anyway for result)
|
|
// 2. Load Value (if needed by other filters, but here we load anyway for result)
|
|
|
- // Optimize: check if we filter on Value before loading?
|
|
|
|
|
- // For simplicity, just load.
|
|
|
|
|
val, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
val, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return []QueryResult{}, nil
|
|
return []QueryResult{}, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 3. Verify other conditions (e.g. CommitIndex)
|
|
|
|
|
|
|
+ // 3. Verify other conditions
|
|
|
matchAll := true
|
|
matchAll := true
|
|
|
for _, m := range matches {
|
|
for _, m := range matches {
|
|
|
field, op, valRaw := m[1], m[2], m[3]
|
|
field, op, valRaw := m[1], m[2], m[3]
|
|
@@ -862,7 +904,6 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
|
|
|
}
|
|
}
|
|
|
if !matchAll { break }
|
|
if !matchAll { break }
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
if matchAll {
|
|
if matchAll {
|
|
|
return []QueryResult{{Key: targetKey, Value: val, CommitIndex: entry.CommitIndex}}, nil
|
|
return []QueryResult{{Key: targetKey, Value: val, CommitIndex: entry.CommitIndex}}, nil
|
|
|
}
|
|
}
|
|
@@ -870,110 +911,176 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- var results []QueryResult
|
|
|
|
|
- var mu sync.Mutex
|
|
|
|
|
-
|
|
|
|
|
|
|
+ // Optimization: Inverted Index for Value Queries
|
|
|
// Strategy:
|
|
// Strategy:
|
|
|
- // 1. Identify primary filter (Key Prefix is best)
|
|
|
|
|
- // 2. Iterate candidates
|
|
|
|
|
- // 3. Filter remaining conditions
|
|
|
|
|
|
|
+ // 1. Extract potential tokens from `value like "..."`
|
|
|
|
|
+ // e.g. `value like "*keyword*"` -> token "keyword"
|
|
|
|
|
+ // 2. Look up candidates from FTIndex
|
|
|
|
|
+ // 3. Intersect/Union candidates (if multiple)
|
|
|
|
|
+ // 4. Fallback to Scan if no tokens found or complex query
|
|
|
|
|
|
|
|
- var prefix string = ""
|
|
|
|
|
- var usePrefix bool = false
|
|
|
|
|
|
|
+ var candidates map[string]bool
|
|
|
|
|
+ var useFTIndex bool = false
|
|
|
|
|
|
|
|
- // Check for key prefix
|
|
|
|
|
for _, match := range matches {
|
|
for _, match := range matches {
|
|
|
- if match[1] == "key" && match[2] == "like" {
|
|
|
|
|
|
|
+ if match[1] == "value" && match[2] == "like" {
|
|
|
pattern := extractString(match[3])
|
|
pattern := extractString(match[3])
|
|
|
- if strings.HasSuffix(pattern, "*") {
|
|
|
|
|
- clean := pattern[:len(pattern)-1]
|
|
|
|
|
- if !strings.ContainsAny(clean, "*?") {
|
|
|
|
|
- prefix = clean
|
|
|
|
|
- usePrefix = true
|
|
|
|
|
- break
|
|
|
|
|
|
|
+ // Extract a core token: remove * from ends
|
|
|
|
|
+ // Simplistic extraction: find longest sequence of alphanumeric?
|
|
|
|
|
+ // For now, assume pattern is like "*token*" or "token*"
|
|
|
|
|
+ clean := strings.Trim(pattern, "*")
|
|
|
|
|
+ if len(clean) > 0 && !strings.Contains(clean, "*") && !strings.Contains(clean, "?") {
|
|
|
|
|
+ // We have a candidate token "clean"
|
|
|
|
|
+ // FTIndex stores partial tokens? No, exact tokens.
|
|
|
|
|
+ // If query is *partial*, we need FTI scan.
|
|
|
|
|
+ // Our FTI.Search handles wildcards!
|
|
|
|
|
+
|
|
|
|
|
+ matches := e.FTIndex.Search(pattern) // Pass original pattern to FTI
|
|
|
|
|
+ if matches != nil {
|
|
|
|
|
+ // We found candidates!
|
|
|
|
|
+ currentSet := make(map[string]bool)
|
|
|
|
|
+ for _, k := range matches {
|
|
|
|
|
+ currentSet[k] = true
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if !useFTIndex {
|
|
|
|
|
+ candidates = currentSet
|
|
|
|
|
+ useFTIndex = true
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // Intersect
|
|
|
|
|
+ newSet := make(map[string]bool)
|
|
|
|
|
+ for k := range candidates {
|
|
|
|
|
+ if currentSet[k] {
|
|
|
|
|
+ newSet[k] = true
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ candidates = newSet
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // Pattern produced NO matches -> Empty Result
|
|
|
|
|
+ return []QueryResult{}, nil
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Iterator
|
|
|
|
|
- iterator := func(key string, entry IndexEntry) bool {
|
|
|
|
|
- // Filter Logic
|
|
|
|
|
|
|
+ // Prepare Iterator
|
|
|
|
|
+ var iterator func(func(string, IndexEntry) bool)
|
|
|
|
|
+
|
|
|
|
|
+ if useFTIndex {
|
|
|
|
|
+ // Iterate ONLY candidates
|
|
|
|
|
+ iterator = func(cb func(string, IndexEntry) bool) {
|
|
|
|
|
+ // Iterate candidates sorted for deterministic output (and better cache locality?)
|
|
|
|
|
+ // Sort candidates keys
|
|
|
|
|
+ keys := make([]string, 0, len(candidates))
|
|
|
|
|
+ for k := range candidates {
|
|
|
|
|
+ keys = append(keys, k)
|
|
|
|
|
+ }
|
|
|
|
|
+ sort.Strings(keys)
|
|
|
|
|
+
|
|
|
|
|
+ for _, k := range keys {
|
|
|
|
|
+ if entry, ok := e.Index.Get(k); ok {
|
|
|
|
|
+ if !cb(k, entry) {
|
|
|
|
|
+ return
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // Full Scan or Prefix Scan
|
|
|
|
|
+ var prefix string = ""
|
|
|
|
|
+ var usePrefix bool = false
|
|
|
|
|
+ for _, match := range matches {
|
|
|
|
|
+ if match[1] == "key" && match[2] == "like" {
|
|
|
|
|
+ pattern := extractString(match[3])
|
|
|
|
|
+ if strings.HasSuffix(pattern, "*") {
|
|
|
|
|
+ clean := pattern[:len(pattern)-1]
|
|
|
|
|
+ if !strings.ContainsAny(clean, "*?") {
|
|
|
|
|
+ prefix = clean
|
|
|
|
|
+ usePrefix = true
|
|
|
|
|
+ break
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ iterator = func(cb func(string, IndexEntry) bool) {
|
|
|
|
|
+ if usePrefix {
|
|
|
|
|
+ e.Index.WalkPrefix(prefix, cb)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ e.Index.WalkPrefix("", cb)
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ var results []QueryResult
|
|
|
|
|
+ var mu sync.Mutex
|
|
|
|
|
+
|
|
|
|
|
+ // Execution
|
|
|
|
|
+ iterator(func(key string, entry IndexEntry) bool {
|
|
|
var valStr string
|
|
var valStr string
|
|
|
var valLoaded bool
|
|
var valLoaded bool
|
|
|
|
|
|
|
|
- matchAll := true
|
|
|
|
|
- for _, match := range matches {
|
|
|
|
|
|
|
+ matchAll := true
|
|
|
|
|
+ for _, match := range matches {
|
|
|
field, op, valRaw := match[1], match[2], match[3]
|
|
field, op, valRaw := match[1], match[2], match[3]
|
|
|
- switch field {
|
|
|
|
|
- case "CommitIndex":
|
|
|
|
|
- num, _ := strconv.ParseUint(valRaw, 10, 64)
|
|
|
|
|
- switch op {
|
|
|
|
|
|
|
+ switch field {
|
|
|
|
|
+ case "CommitIndex":
|
|
|
|
|
+ num, _ := strconv.ParseUint(valRaw, 10, 64)
|
|
|
|
|
+ switch op {
|
|
|
case ">": if !(entry.CommitIndex > num) { matchAll = false }
|
|
case ">": if !(entry.CommitIndex > num) { matchAll = false }
|
|
|
case "<": if !(entry.CommitIndex < num) { matchAll = false }
|
|
case "<": if !(entry.CommitIndex < num) { matchAll = false }
|
|
|
case ">=": if !(entry.CommitIndex >= num) { matchAll = false }
|
|
case ">=": if !(entry.CommitIndex >= num) { matchAll = false }
|
|
|
case "<=": if !(entry.CommitIndex <= num) { matchAll = false }
|
|
case "<=": if !(entry.CommitIndex <= num) { matchAll = false }
|
|
|
case "=": if !(entry.CommitIndex == num) { matchAll = false }
|
|
case "=": if !(entry.CommitIndex == num) { matchAll = false }
|
|
|
- }
|
|
|
|
|
- case "key":
|
|
|
|
|
- target := extractString(valRaw)
|
|
|
|
|
- switch op {
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ case "key":
|
|
|
|
|
+ target := extractString(valRaw)
|
|
|
|
|
+ switch op {
|
|
|
case "=": if key != target { matchAll = false }
|
|
case "=": if key != target { matchAll = false }
|
|
|
case "like": if !WildcardMatch(key, target) { matchAll = false }
|
|
case "like": if !WildcardMatch(key, target) { matchAll = false }
|
|
|
- }
|
|
|
|
|
- case "value":
|
|
|
|
|
- // Lazy load
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ case "value":
|
|
|
|
|
+ // Optimization: If using FTIndex, we know token matches, but pattern might be complex.
|
|
|
|
|
+ // However, FTI.Search already filtered by pattern!
|
|
|
|
|
+ // So if we trusted FTI, we could skip this check IF it was the only value check.
|
|
|
|
|
+ // But let's be safe and re-check, especially if multiple value conditions exist.
|
|
|
|
|
+
|
|
|
if !valLoaded {
|
|
if !valLoaded {
|
|
|
v, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
v, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
|
if err != nil { matchAll = false; break }
|
|
if err != nil { matchAll = false; break }
|
|
|
valStr = v
|
|
valStr = v
|
|
|
valLoaded = true
|
|
valLoaded = true
|
|
|
}
|
|
}
|
|
|
- target := extractString(valRaw)
|
|
|
|
|
- switch op {
|
|
|
|
|
|
|
+ target := extractString(valRaw)
|
|
|
|
|
+ switch op {
|
|
|
case "=": if valStr != target { matchAll = false }
|
|
case "=": if valStr != target { matchAll = false }
|
|
|
case "like": if !WildcardMatch(valStr, target) { matchAll = false }
|
|
case "like": if !WildcardMatch(valStr, target) { matchAll = false }
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- if !matchAll { break }
|
|
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- if matchAll {
|
|
|
|
|
- // Load value if needed for result
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ if !matchAll { break }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if matchAll {
|
|
|
if !valLoaded {
|
|
if !valLoaded {
|
|
|
v, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
v, err := e.Storage.ReadValue(entry.ValueOffset)
|
|
|
if err == nil { valStr = v }
|
|
if err == nil { valStr = v }
|
|
|
}
|
|
}
|
|
|
- mu.Lock()
|
|
|
|
|
- results = append(results, QueryResult{
|
|
|
|
|
|
|
+ mu.Lock()
|
|
|
|
|
+ results = append(results, QueryResult{
|
|
|
Key: key,
|
|
Key: key,
|
|
|
- Value: valStr,
|
|
|
|
|
- CommitIndex: entry.CommitIndex,
|
|
|
|
|
- })
|
|
|
|
|
- // Optimization: If simple LIMIT without Offset and no sorting requirements (default key sort is free in Radix),
|
|
|
|
|
- // we could stop early. But we need to support Offset and flexible Sort.
|
|
|
|
|
- // Currently Radix Walk is Key-Ordered.
|
|
|
|
|
- // If user doesn't require specific sort (or accepts key sort), we can stop.
|
|
|
|
|
- // Let's assume Key Sort is default.
|
|
|
|
|
|
|
+ Value: valStr,
|
|
|
|
|
+ CommitIndex: entry.CommitIndex,
|
|
|
|
|
+ })
|
|
|
if limit > 0 && offset == 0 && len(results) >= limit {
|
|
if limit > 0 && offset == 0 && len(results) >= limit {
|
|
|
mu.Unlock()
|
|
mu.Unlock()
|
|
|
- return false // Stop iteration
|
|
|
|
|
|
|
+ return false
|
|
|
}
|
|
}
|
|
|
- mu.Unlock()
|
|
|
|
|
- }
|
|
|
|
|
- return true // Continue
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- if usePrefix {
|
|
|
|
|
- e.Index.WalkPrefix(prefix, iterator)
|
|
|
|
|
- } else {
|
|
|
|
|
- e.Index.WalkPrefix("", iterator) // Full Scan
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ mu.Unlock()
|
|
|
|
|
+ }
|
|
|
|
|
+ return true
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
- // Results are already sorted by Key due to Radix Tree Walk order!
|
|
|
|
|
- // So we can skip sort.Slice if we trust WalkPrefix.
|
|
|
|
|
- // My WalkPrefix implementation attempts to be ordered.
|
|
|
|
|
-
|
|
|
|
|
// Pagination
|
|
// Pagination
|
|
|
if offset > 0 {
|
|
if offset > 0 {
|
|
|
if offset >= len(results) {
|
|
if offset >= len(results) {
|
|
@@ -989,12 +1096,3 @@ func (e *Engine) Query(sql string) ([]QueryResult, error) {
|
|
|
|
|
|
|
|
return results, nil
|
|
return results, nil
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
-func (e *Engine) Snapshot() ([]byte, error) {
|
|
|
|
|
- // Not implemented for Radix Tree yet in this demo
|
|
|
|
|
- return nil, nil
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-func (e *Engine) Restore(data []byte) error {
|
|
|
|
|
- return nil
|
|
|
|
|
-}
|
|
|