Add usage cache cleanup and lower forced top compaction (#19719)

Lower forced compaction to 250K entries.

If there is more than 250K entries on the top level force compact it and log an error.
This commit is contained in:
Klaus Post 2024-05-10 07:49:50 -07:00 committed by GitHub
parent abae30f9e1
commit 9667a170de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 49 additions and 1 deletions

View file

@ -53,7 +53,7 @@ const (
dataScannerCompactLeastObject = 500 // Compact when there is less than this many objects in a branch.
dataScannerCompactAtChildren = 10000 // Compact when there are this many children in a branch.
dataScannerCompactAtFolders = dataScannerCompactAtChildren / 4 // Compact when this many subfolders in a single folder.
dataScannerForceCompactAtFolders = 1_000_000 // Compact when this many subfolders in a single folder (even top level).
dataScannerForceCompactAtFolders = 250_000 // Compact when this many subfolders in a single folder (even top level).
dataScannerStartDelay = 1 * time.Minute // Time to wait on startup and between cycles.
healDeleteDangling = true
@ -349,6 +349,7 @@ func scanDataFolder(ctx context.Context, disks []StorageAPI, basePath string, ca
// No useful information...
return cache, err
}
s.newCache.forceCompact(dataScannerCompactAtChildren)
s.newCache.Info.LastUpdate = UTCNow()
s.newCache.Info.NextCycle = cache.Info.NextCycle
return s.newCache, nil

View file

@ -729,6 +729,53 @@ func (d *dataUsageCache) reduceChildrenOf(path dataUsageHash, limit int, compact
}
}
// forceCompact will force compact the cache of the top entry.
// If the number of children is more than limit*100, it will compact self.
// When above the limit a cleanup will also be performed to remove any possible abandoned entries.
func (d *dataUsageCache) forceCompact(limit int) {
if d == nil || len(d.Cache) <= limit {
return
}
top := hashPath(d.Info.Name).Key()
topE := d.find(top)
if topE == nil {
scannerLogIf(GlobalContext, errors.New("forceCompact: root not found"))
return
}
// If off by 2 orders of magnitude, compact self and log error.
if len(topE.Children) > dataScannerForceCompactAtFolders {
// If we still have too many children, compact self.
scannerLogOnceIf(GlobalContext, fmt.Errorf("forceCompact: %q has %d children. Force compacting. Expect reduced scanner performance", d.Info.Name, len(topE.Children)), d.Info.Name)
d.reduceChildrenOf(hashPath(d.Info.Name), limit, true)
}
if len(d.Cache) <= limit {
return
}
// Check for abandoned entries.
found := make(map[string]struct{}, len(d.Cache))
// Mark all children recursively
var mark func(entry dataUsageEntry)
mark = func(entry dataUsageEntry) {
for k := range entry.Children {
found[k] = struct{}{}
if ch, ok := d.Cache[k]; ok {
mark(ch)
}
}
}
found[top] = struct{}{}
mark(*topE)
// Delete all entries not found.
for k := range d.Cache {
if _, ok := found[k]; !ok {
delete(d.Cache, k)
}
}
}
// StringAll returns a detailed string representation of all entries in the cache.
func (d *dataUsageCache) StringAll() string {
// Remove bloom filter from print.