mirror of
https://github.com/minio/minio
synced 2024-11-05 17:34:01 +00:00
crawler: Remove waitForLowActiveIO (#10667)
Only use dynamic delays for the crawler. Even though the max wait was 1 second the number of waits could severely impact crawler speed. Instead of relying on a global metric, we use the stateless local delays to keep the crawler running at a speed more adjusted to current conditions. The only case we keep it is before bitrot checks when enabled.
This commit is contained in:
parent
9c042a503b
commit
03991c5d41
4 changed files with 19 additions and 21 deletions
|
@ -135,12 +135,11 @@ type cachedFolder struct {
|
|||
}
|
||||
|
||||
type folderScanner struct {
|
||||
root string
|
||||
getSize getSizeFn
|
||||
oldCache dataUsageCache
|
||||
newCache dataUsageCache
|
||||
withFilter *bloomFilter
|
||||
waitForLowActiveIO func()
|
||||
root string
|
||||
getSize getSizeFn
|
||||
oldCache dataUsageCache
|
||||
newCache dataUsageCache
|
||||
withFilter *bloomFilter
|
||||
|
||||
dataUsageCrawlMult float64
|
||||
dataUsageCrawlDebug bool
|
||||
|
@ -155,7 +154,7 @@ type folderScanner struct {
|
|||
// The returned cache will always be valid, but may not be updated from the existing.
|
||||
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
|
||||
// If the supplied context is canceled the function will return at the first chance.
|
||||
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
|
||||
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
|
||||
t := UTCNow()
|
||||
|
||||
logPrefix := color.Green("data-usage: ")
|
||||
|
@ -183,7 +182,6 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
|
|||
getSize: getSize,
|
||||
oldCache: cache,
|
||||
newCache: dataUsageCache{Info: cache.Info},
|
||||
waitForLowActiveIO: waitForLowActiveIO,
|
||||
newFolders: nil,
|
||||
existingFolders: nil,
|
||||
dataUsageCrawlMult: delayMult,
|
||||
|
@ -376,7 +374,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||
}
|
||||
}
|
||||
}
|
||||
f.waitForLowActiveIO()
|
||||
sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult)
|
||||
|
||||
cache := dataUsageEntry{}
|
||||
|
@ -424,7 +421,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||
}
|
||||
return nil
|
||||
}
|
||||
f.waitForLowActiveIO()
|
||||
// Dynamic time delay.
|
||||
t := UTCNow()
|
||||
|
||||
|
@ -484,7 +480,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||
// If that doesn't bring it back we remove the folder and assume it was deleted.
|
||||
// This means that the next run will not look for it.
|
||||
for k := range existing {
|
||||
f.waitForLowActiveIO()
|
||||
// Dynamic time delay.
|
||||
t := UTCNow()
|
||||
|
||||
bucket, prefix := path2BucketObject(k)
|
||||
if f.dataUsageCrawlDebug {
|
||||
logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix)
|
||||
|
@ -498,6 +496,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||
versionID: versionID,
|
||||
}, madmin.HealItemObject)
|
||||
})
|
||||
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
|
||||
|
||||
if f.dataUsageCrawlDebug && err != nil {
|
||||
logger.Info(color.Green("healObjects:")+" checking returned value %v", err)
|
||||
|
@ -535,7 +534,6 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder)
|
|||
default:
|
||||
}
|
||||
|
||||
f.waitForLowActiveIO()
|
||||
if typ&os.ModeDir != 0 {
|
||||
dirStack = append(dirStack, entName)
|
||||
err := readDirFn(path.Join(dirStack...), addDir)
|
||||
|
|
|
@ -62,7 +62,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
|||
return 0, nil
|
||||
}
|
||||
|
||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
|
||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
|||
},
|
||||
}
|
||||
createUsageTestFiles(t, base, bucket, files)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
|||
}
|
||||
// Changed dir must be picked up in this many cycles.
|
||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -355,7 +355,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
|||
}
|
||||
return 0, nil
|
||||
}
|
||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize)
|
||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -465,7 +465,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
|||
},
|
||||
}
|
||||
createUsageTestFiles(t, base, "", files)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -548,7 +548,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
|||
}
|
||||
// Changed dir must be picked up in this many cycles.
|
||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
||||
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -652,7 +652,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
|
|||
}
|
||||
return 0, nil
|
||||
}
|
||||
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
|
||||
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -327,7 +327,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
|
|||
}
|
||||
|
||||
// Load bucket info.
|
||||
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, fs.waitForLowActiveIO, func(item crawlItem) (int64, error) {
|
||||
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (int64, error) {
|
||||
bucket, object := item.bucket, item.objectPath()
|
||||
fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
|
|
|
@ -372,7 +372,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
|
|||
}
|
||||
opts := globalCrawlerConfig
|
||||
|
||||
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item crawlItem) (int64, error) {
|
||||
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (int64, error) {
|
||||
// Look for `xl.meta/xl.json' at the leaf.
|
||||
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
|
||||
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
|
||||
|
|
Loading…
Reference in a new issue