crawler: Remove waitForLowActiveIO (#10667)

Only use dynamic delays for the crawler. Even though the max wait was 1 second the number 
of waits could severely impact crawler speed.

Instead of relying on a global metric, we use the stateless local delays to keep the crawler 
running at a speed more adjusted to current conditions.

The only case we keep it is before bitrot checks when enabled.
This commit is contained in:
Klaus Post 2020-10-13 13:45:08 -07:00 committed by GitHub
parent 9c042a503b
commit 03991c5d41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 21 deletions

View file

@ -135,12 +135,11 @@ type cachedFolder struct {
}
type folderScanner struct {
root string
getSize getSizeFn
oldCache dataUsageCache
newCache dataUsageCache
withFilter *bloomFilter
waitForLowActiveIO func()
root string
getSize getSizeFn
oldCache dataUsageCache
newCache dataUsageCache
withFilter *bloomFilter
dataUsageCrawlMult float64
dataUsageCrawlDebug bool
@ -155,7 +154,7 @@ type folderScanner struct {
// The returned cache will always be valid, but may not be updated from the existing.
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
// If the supplied context is canceled the function will return at the first chance.
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
t := UTCNow()
logPrefix := color.Green("data-usage: ")
@ -183,7 +182,6 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
getSize: getSize,
oldCache: cache,
newCache: dataUsageCache{Info: cache.Info},
waitForLowActiveIO: waitForLowActiveIO,
newFolders: nil,
existingFolders: nil,
dataUsageCrawlMult: delayMult,
@ -376,7 +374,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
}
}
}
f.waitForLowActiveIO()
sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult)
cache := dataUsageEntry{}
@ -424,7 +421,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
}
return nil
}
f.waitForLowActiveIO()
// Dynamic time delay.
t := UTCNow()
@ -484,7 +480,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
// If that doesn't bring it back we remove the folder and assume it was deleted.
// This means that the next run will not look for it.
for k := range existing {
f.waitForLowActiveIO()
// Dynamic time delay.
t := UTCNow()
bucket, prefix := path2BucketObject(k)
if f.dataUsageCrawlDebug {
logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix)
@ -498,6 +496,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
versionID: versionID,
}, madmin.HealItemObject)
})
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
if f.dataUsageCrawlDebug && err != nil {
logger.Info(color.Green("healObjects:")+" checking returned value %v", err)
@ -535,7 +534,6 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder)
default:
}
f.waitForLowActiveIO()
if typ&os.ModeDir != 0 {
dirStack = append(dirStack, entName)
err := readDirFn(path.Join(dirStack...), addDir)

View file

@ -62,7 +62,7 @@ func TestDataUsageUpdate(t *testing.T) {
return 0, nil
}
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil {
t.Fatal(err)
}
@ -183,7 +183,7 @@ func TestDataUsageUpdate(t *testing.T) {
},
}
createUsageTestFiles(t, base, bucket, files)
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -268,7 +268,7 @@ func TestDataUsageUpdate(t *testing.T) {
}
// Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -355,7 +355,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}
return 0, nil
}
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize)
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
if err != nil {
t.Fatal(err)
}
@ -465,7 +465,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
},
}
createUsageTestFiles(t, base, "", files)
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -548,7 +548,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}
// Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -652,7 +652,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
}
return 0, nil
}
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil {
t.Fatal(err)
}

View file

@ -327,7 +327,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
}
// Load bucket info.
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, fs.waitForLowActiveIO, func(item crawlItem) (int64, error) {
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (int64, error) {
bucket, object := item.bucket, item.objectPath()
fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
if err != nil && !os.IsNotExist(err) {

View file

@ -372,7 +372,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
}
opts := globalCrawlerConfig
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item crawlItem) (int64, error) {
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (int64, error) {
// Look for `xl.meta/xl.json' at the leaf.
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {