|
@@ -86,7 +86,10 @@ func (fi *FlatIndex) compare(idx int, target string) int {
|
|
|
func (fi *FlatIndex) Insert(key string, entry IndexEntry) {
|
|
func (fi *FlatIndex) Insert(key string, entry IndexEntry) {
|
|
|
fi.mu.Lock()
|
|
fi.mu.Lock()
|
|
|
defer fi.mu.Unlock()
|
|
defer fi.mu.Unlock()
|
|
|
|
|
+ fi.insertLocked(key, entry)
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
+func (fi *FlatIndex) insertLocked(key string, entry IndexEntry) {
|
|
|
// 1. Binary Search
|
|
// 1. Binary Search
|
|
|
idx := sort.Search(len(fi.items), func(i int) bool {
|
|
idx := sort.Search(len(fi.items), func(i int) bool {
|
|
|
return fi.getKey(i) >= key
|
|
return fi.getKey(i) >= key
|
|
@@ -586,6 +589,10 @@ func NewEngine(dataDir string, opts ...EngineOption) (*Engine, error) {
|
|
|
// Rebuild Index from Disk
|
|
// Rebuild Index from Disk
|
|
|
// Note: We still scan to rebuild the memory index, but LastCommitIndex is initialized from meta file
|
|
// Note: We still scan to rebuild the memory index, but LastCommitIndex is initialized from meta file
|
|
|
// We can update LastCommitIndex if the log is ahead of the meta file (e.g. crash before meta update)
|
|
// We can update LastCommitIndex if the log is ahead of the meta file (e.g. crash before meta update)
|
|
|
|
|
+ // We also correct LastCommitIndex if meta file is ahead of data (e.g. data flush lost during power failure)
|
|
|
|
|
+
|
|
|
|
|
+ realMaxIndex := uint64(0)
|
|
|
|
|
+
|
|
|
_, err = store.Scan(func(rec Record) {
|
|
_, err = store.Scan(func(rec Record) {
|
|
|
if rec.Type == RecordTypePut {
|
|
if rec.Type == RecordTypePut {
|
|
|
e.Index.Insert(rec.Key, IndexEntry{
|
|
e.Index.Insert(rec.Key, IndexEntry{
|
|
@@ -595,23 +602,34 @@ func NewEngine(dataDir string, opts ...EngineOption) (*Engine, error) {
|
|
|
if e.FTIndex != nil {
|
|
if e.FTIndex != nil {
|
|
|
e.FTIndex.Add(rec.Key, rec.Value)
|
|
e.FTIndex.Add(rec.Key, rec.Value)
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- // Update LastCommitIndex if log is ahead
|
|
|
|
|
- if rec.CommitIndex > e.LastCommitIndex {
|
|
|
|
|
- e.LastCommitIndex = rec.CommitIndex
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
} else if rec.Type == RecordTypeDelete {
|
|
} else if rec.Type == RecordTypeDelete {
|
|
|
// Cleanup FTIndex using old value if possible
|
|
// Cleanup FTIndex using old value if possible
|
|
|
e.removeValueFromFTIndex(rec.Key)
|
|
e.removeValueFromFTIndex(rec.Key)
|
|
|
e.Index.Delete(rec.Key)
|
|
e.Index.Delete(rec.Key)
|
|
|
-
|
|
|
|
|
- if rec.CommitIndex > e.LastCommitIndex {
|
|
|
|
|
- e.LastCommitIndex = rec.CommitIndex
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Track the actual max index present in the data file
|
|
|
|
|
+ if rec.CommitIndex > realMaxIndex {
|
|
|
|
|
+ realMaxIndex = rec.CommitIndex
|
|
|
}
|
|
}
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
|
|
+ // Critical Safety Check:
|
|
|
|
|
+ // 1. If Log > Meta: Update Meta (Normal crash recovery)
|
|
|
|
|
+ // 2. If Meta > Log: Downgrade Meta (Data loss recovery - force Raft replay)
|
|
|
|
|
+ if realMaxIndex > e.LastCommitIndex {
|
|
|
|
|
+ e.LastCommitIndex = realMaxIndex
|
|
|
|
|
+ } else if realMaxIndex < e.LastCommitIndex {
|
|
|
|
|
+ // Detect inconsistency: Meta says we have more data than what's on disk.
|
|
|
|
|
+ // This happens if Meta was flushed but Data tail was lost during power failure.
|
|
|
|
|
+ // We MUST trust the actual data on disk.
|
|
|
|
|
+ fmt.Printf("WARNING: Inconsistency detected! Meta LastCommitIndex (%d) > Data RealIndex (%d). Resetting to %d to force Raft replay.\n",
|
|
|
|
|
+ e.LastCommitIndex, realMaxIndex, realMaxIndex)
|
|
|
|
|
+ e.LastCommitIndex = realMaxIndex
|
|
|
|
|
+ // Force save corrected metadata
|
|
|
|
|
+ e.saveMetadata()
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
return e, nil
|
|
return e, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1326,9 +1344,13 @@ func (e *Engine) Restore(data []byte) error {
|
|
|
// 3. Truncate & Reset
|
|
// 3. Truncate & Reset
|
|
|
// We are replacing the entire DB state.
|
|
// We are replacing the entire DB state.
|
|
|
|
|
|
|
|
- // Reset In-Memory Index
|
|
|
|
|
- e.Index = NewFlatIndex()
|
|
|
|
|
|
|
+ // Reset In-Memory Index by clearing slices (keeping underlying capacity)
|
|
|
|
|
+ // We do NOT replace the pointer, so we keep the lock valid.
|
|
|
|
|
+ e.Index.keyBuf = e.Index.keyBuf[:0]
|
|
|
|
|
+ e.Index.items = e.Index.items[:0]
|
|
|
|
|
+
|
|
|
if e.config.EnableValueIndex {
|
|
if e.config.EnableValueIndex {
|
|
|
|
|
+ // Recreate FTIndex since it's a map
|
|
|
e.FTIndex = NewFullTextIndex()
|
|
e.FTIndex = NewFullTextIndex()
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1352,12 +1374,15 @@ func (e *Engine) Restore(data []byte) error {
|
|
|
|
|
|
|
|
// 4. Rebuild Data from Snapshot Stream
|
|
// 4. Rebuild Data from Snapshot Stream
|
|
|
for i := uint64(0); i < count; i++ {
|
|
for i := uint64(0); i < count; i++ {
|
|
|
- // Read KeyLen
|
|
|
|
|
- if offset+2 > len(data) {
|
|
|
|
|
- return fmt.Errorf("restore failed: truncated data at record %d (keylen)", i)
|
|
|
|
|
|
|
+ // Read Header (14 bytes)
|
|
|
|
|
+ if offset+14 > len(data) {
|
|
|
|
|
+ return fmt.Errorf("restore failed: truncated header at record %d", i)
|
|
|
}
|
|
}
|
|
|
- keyLen := int(binary.LittleEndian.Uint16(data[offset:]))
|
|
|
|
|
- offset += 2
|
|
|
|
|
|
|
+
|
|
|
|
|
+ keyLen := int(binary.LittleEndian.Uint16(data[offset : offset+2]))
|
|
|
|
|
+ valLen := int(binary.LittleEndian.Uint32(data[offset+2 : offset+6]))
|
|
|
|
|
+ commitIndex := binary.LittleEndian.Uint64(data[offset+6 : offset+14])
|
|
|
|
|
+ offset += 14
|
|
|
|
|
|
|
|
// Read Key
|
|
// Read Key
|
|
|
if offset+keyLen > len(data) {
|
|
if offset+keyLen > len(data) {
|
|
@@ -1366,13 +1391,6 @@ func (e *Engine) Restore(data []byte) error {
|
|
|
key := string(data[offset : offset+keyLen])
|
|
key := string(data[offset : offset+keyLen])
|
|
|
offset += keyLen
|
|
offset += keyLen
|
|
|
|
|
|
|
|
- // Read ValLen
|
|
|
|
|
- if offset+4 > len(data) {
|
|
|
|
|
- return fmt.Errorf("restore failed: truncated data at record %d (vallen)", i)
|
|
|
|
|
- }
|
|
|
|
|
- valLen := int(binary.LittleEndian.Uint32(data[offset:]))
|
|
|
|
|
- offset += 4
|
|
|
|
|
-
|
|
|
|
|
// Read Value
|
|
// Read Value
|
|
|
if offset+valLen > len(data) {
|
|
if offset+valLen > len(data) {
|
|
|
return fmt.Errorf("restore failed: truncated data at record %d (value)", i)
|
|
return fmt.Errorf("restore failed: truncated data at record %d (value)", i)
|
|
@@ -1380,13 +1398,6 @@ func (e *Engine) Restore(data []byte) error {
|
|
|
val := string(data[offset : offset+valLen])
|
|
val := string(data[offset : offset+valLen])
|
|
|
offset += valLen
|
|
offset += valLen
|
|
|
|
|
|
|
|
- // Read CommitIndex
|
|
|
|
|
- if offset+8 > len(data) {
|
|
|
|
|
- return fmt.Errorf("restore failed: truncated data at record %d (commitIndex)", i)
|
|
|
|
|
- }
|
|
|
|
|
- commitIndex := binary.LittleEndian.Uint64(data[offset:])
|
|
|
|
|
- offset += 8
|
|
|
|
|
-
|
|
|
|
|
if commitIndex > maxCommitIndex {
|
|
if commitIndex > maxCommitIndex {
|
|
|
maxCommitIndex = commitIndex
|
|
maxCommitIndex = commitIndex
|
|
|
}
|
|
}
|
|
@@ -1402,7 +1413,8 @@ func (e *Engine) Restore(data []byte) error {
|
|
|
// Update Memory Index
|
|
// Update Memory Index
|
|
|
// Accessing e.Index.items directly because we hold e.Index.mu
|
|
// Accessing e.Index.items directly because we hold e.Index.mu
|
|
|
// But we should use the helper to safely manage keyBuf
|
|
// But we should use the helper to safely manage keyBuf
|
|
|
- e.Index.Insert(key, IndexEntry{
|
|
|
|
|
|
|
+ // Use insertLocked to avoid deadlock (we already hold e.Index.mu)
|
|
|
|
|
+ e.Index.insertLocked(key, IndexEntry{
|
|
|
ValueOffset: writeOffset,
|
|
ValueOffset: writeOffset,
|
|
|
CommitIndex: commitIndex,
|
|
CommitIndex: commitIndex,
|
|
|
})
|
|
})
|