diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 16a345dfe..22bf7b81a 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -289,20 +289,20 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) { } func getLocalDisksToHeal() (disksToHeal Endpoints) { - for _, ep := range globalEndpoints { - for _, endpoint := range ep.Endpoints { - if !endpoint.IsLocal { - continue - } - // Try to connect to the current endpoint - // and reformat if the current disk is not formatted - disk, _, err := connectEndpoint(endpoint) - if errors.Is(err, errUnformattedDisk) { - disksToHeal = append(disksToHeal, endpoint) - } else if err == nil && disk != nil && disk.Healing() != nil { - disksToHeal = append(disksToHeal, disk.Endpoint()) - } + for _, disk := range globalLocalDrives { + _, err := disk.GetDiskID() + if errors.Is(err, errUnformattedDisk) { + disksToHeal = append(disksToHeal, disk.Endpoint()) + continue } + if disk.Healing() != nil { + disksToHeal = append(disksToHeal, disk.Endpoint()) + } + } + if len(disksToHeal) == globalEndpoints.NEndpoints() { + // When all disks == all command line endpoints + // this is a fresh setup, no need to trigger healing. + return Endpoints{} } return disksToHeal } diff --git a/cmd/config.go b/cmd/config.go index 2b15eb518..a7975f9b7 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -35,8 +35,8 @@ import ( const ( minioConfigPrefix = "config" - - kvPrefix = ".kv" + minioConfigBucket = minioMetaBucket + SlashSeparator + minioConfigPrefix + kvPrefix = ".kv" // Captures all the previous SetKV operations and allows rollback. minioConfigHistoryPrefix = minioConfigPrefix + "/history" diff --git a/cmd/data-update-tracker.go b/cmd/data-update-tracker.go index a5ad32d9c..0ca5f1bf2 100644 --- a/cmd/data-update-tracker.go +++ b/cmd/data-update-tracker.go @@ -204,14 +204,14 @@ func (d *dataUpdateTracker) latestWithDir(dir string) uint64 { // All of these will exit when the context is canceled. func (d *dataUpdateTracker) start(ctx context.Context, drives ...string) { if len(drives) == 0 { - logger.LogIf(ctx, errors.New("dataUpdateTracker.start: No drives specified")) + logger.LogIf(ctx, errors.New("dataUpdateTracker.start: No local drives specified")) return } d.load(ctx, drives...) go d.startCollector(ctx) // startSaver will unlock. d.mu.Lock() - go d.startSaver(ctx, dataUpdateTrackerSaveInterval, drives) + go d.startSaver(ctx, dataUpdateTrackerSaveInterval, drives...) } // load will attempt to load data tracking information from the supplied drives. @@ -221,7 +221,7 @@ func (d *dataUpdateTracker) start(ctx context.Context, drives ...string) { // If object is shared the caller should lock it. func (d *dataUpdateTracker) load(ctx context.Context, drives ...string) { if len(drives) == 0 { - logger.LogIf(ctx, errors.New("dataUpdateTracker.load: No drives specified")) + logger.LogIf(ctx, errors.New("dataUpdateTracker.load: No local drives specified")) return } for _, drive := range drives { @@ -246,7 +246,11 @@ func (d *dataUpdateTracker) load(ctx context.Context, drives ...string) { // startSaver will start a saver that will write d to all supplied drives at specific intervals. // 'd' must be write locked when started and will be unlocked. // The saver will save and exit when supplied context is closed. -func (d *dataUpdateTracker) startSaver(ctx context.Context, interval time.Duration, drives []string) { +func (d *dataUpdateTracker) startSaver(ctx context.Context, interval time.Duration, drives ...string) { + if len(drives) == 0 { + return + } + saveNow := d.save exited := make(chan struct{}) d.saveExited = exited diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index ec2d0292c..39a67ab34 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -72,16 +72,9 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ } ) - var localDrives []string - + var localDrives []StorageAPI local := endpointServerPools.FirstLocal() for i, ep := range endpointServerPools { - for _, endpoint := range ep.Endpoints { - if endpoint.IsLocal { - localDrives = append(localDrives, endpoint.Path) - } - } - // If storage class is not set during startup, default values are used // -- Default for Reduced Redundancy Storage class is, parity = 2 // -- Default for Standard Storage class is, parity = 2 - disks 4, 5 @@ -101,6 +94,12 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ return nil, err } + for _, storageDisk := range storageDisks[i] { + if storageDisk != nil && storageDisk.IsLocal() { + localDrives = append(localDrives, storageDisk) + } + } + if deploymentID == "" { // all zones should have same deployment ID deploymentID = formats[i].ID @@ -124,7 +123,7 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ z.decommissionCancelers = make([]context.CancelFunc, len(z.serverPools)) r := rand.New(rand.NewSource(time.Now().UnixNano())) for { - err := z.Init(ctx) + err := z.Init(ctx) // Initializes all pools. if err != nil { if !configRetriableErrors(err) { logger.Fatal(err, "Unable to initialize backend") @@ -135,8 +134,14 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ break } + drives := make([]string, 0, len(localDrives)) + for _, localDrive := range localDrives { + drives = append(drives, localDrive.Endpoint().Path) + } + + globalLocalDrives = localDrives ctx, z.shutdown = context.WithCancel(ctx) - go intDataUpdateTracker.start(ctx, localDrives...) + go intDataUpdateTracker.start(ctx, drives...) return z, nil } @@ -177,7 +182,7 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string found := 0 for _, s := range z.serverPools { for _, disks := range s.erasureDisks { - for i, disk := range disks { + for _, disk := range disks { if disk == OfflineDisk { continue } @@ -185,10 +190,6 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string if err != nil { continue } - did, err := disk.GetDiskID() - if err != nil { - did = fmt.Sprintf("disk-%d", i) - } for _, si := range stats { found++ var r io.ReadCloser @@ -200,7 +201,9 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string } else { r = io.NopCloser(bytes.NewBuffer([]byte{})) } - err = fn(r, disk.Hostname(), did, pathJoin(volume, si.Name), si) + // Keep disk path instead of ID, to ensure that the downloaded zip file can be + // easily automated with `minio server hostname{1...n}/disk{1...m}`. + err = fn(r, disk.Hostname(), disk.Endpoint().Path, pathJoin(volume, si.Name), si) r.Close() if err != nil { return err diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index c6f17af1e..19004634a 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -414,44 +414,67 @@ func newErasureSets(ctx context.Context, endpoints PoolEndpoints, storageDisks [ lockerEpSet.Add(endpoint.Host) s.erasureLockers[i] = append(s.erasureLockers[i], locker) } - disk := storageDisks[i*setDriveCount+j] - if disk == nil { - continue - } - diskID, derr := disk.GetDiskID() - if derr != nil { - continue - } - m, n, err := findDiskIndexByDiskID(format, diskID) - if err != nil { - continue - } - if m != i || n != j { - logger.LogIf(GlobalContext, fmt.Errorf("Detected unexpected disk ordering refusing to use the disk - poolID: %s, found disk mounted at (set=%s, disk=%s) expected mount at (set=%s, disk=%s): %s(%s)", humanize.Ordinal(poolIdx+1), humanize.Ordinal(m+1), humanize.Ordinal(n+1), humanize.Ordinal(i+1), humanize.Ordinal(j+1), disk, diskID)) - s.erasureDisks[i][j] = &unrecognizedDisk{storage: disk} - continue - } - disk.SetDiskLoc(s.poolIndex, m, n) - s.endpointStrings[m*setDriveCount+n] = disk.String() - s.erasureDisks[m][n] = disk - } - - // Initialize erasure objects for a given set. - s.sets[i] = &erasureObjects{ - setIndex: i, - poolIndex: poolIdx, - setDriveCount: setDriveCount, - defaultParityCount: defaultParityCount, - getDisks: s.GetDisks(i), - getLockers: s.GetLockers(i), - getEndpoints: s.GetEndpoints(i), - deletedCleanupSleeper: newDynamicSleeper(10, 2*time.Second), - nsMutex: mutex, - bp: bp, - bpOld: bpOld, } } + var wg sync.WaitGroup + for i := 0; i < setCount; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + + var innerWg sync.WaitGroup + for j := 0; j < setDriveCount; j++ { + disk := storageDisks[i*setDriveCount+j] + if disk == nil { + continue + } + innerWg.Add(1) + go func(disk StorageAPI, i, j int) { + defer innerWg.Done() + diskID, err := disk.GetDiskID() + if err != nil { + if !errors.Is(err, errUnformattedDisk) { + logger.LogIf(ctx, err) + } + return + } + m, n, err := findDiskIndexByDiskID(format, diskID) + if err != nil { + logger.LogIf(ctx, err) + return + } + if m != i || n != j { + logger.LogIf(ctx, fmt.Errorf("Detected unexpected disk ordering refusing to use the disk - poolID: %s, found disk mounted at (set=%s, disk=%s) expected mount at (set=%s, disk=%s): %s(%s)", humanize.Ordinal(poolIdx+1), humanize.Ordinal(m+1), humanize.Ordinal(n+1), humanize.Ordinal(i+1), humanize.Ordinal(j+1), disk, diskID)) + s.erasureDisks[i][j] = &unrecognizedDisk{storage: disk} + return + } + disk.SetDiskLoc(s.poolIndex, m, n) + s.endpointStrings[m*setDriveCount+n] = disk.String() + s.erasureDisks[m][n] = disk + }(disk, i, j) + } + innerWg.Wait() + + // Initialize erasure objects for a given set. + s.sets[i] = &erasureObjects{ + setIndex: i, + poolIndex: poolIdx, + setDriveCount: setDriveCount, + defaultParityCount: defaultParityCount, + getDisks: s.GetDisks(i), + getLockers: s.GetLockers(i), + getEndpoints: s.GetEndpoints(i), + deletedCleanupSleeper: newDynamicSleeper(10, 2*time.Second), + nsMutex: mutex, + bp: bp, + bpOld: bpOld, + } + }(i) + } + + wg.Wait() + // start cleanup stale uploads go-routine. go s.cleanupStaleUploads(ctx) diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index a7245ca93..06aa92f2b 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -24,6 +24,7 @@ import ( "encoding/json" "errors" "fmt" + "io/fs" "io/ioutil" "reflect" "sync" @@ -156,13 +157,9 @@ func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 { // Returns format Erasure version after reading `format.json`, returns // successfully the version only if the backend is Erasure. -func formatGetBackendErasureVersion(formatPath string) (string, error) { +func formatGetBackendErasureVersion(b []byte) (string, error) { meta := &formatMetaV1{} - b, err := xioutil.ReadFile(formatPath) - if err != nil { - return "", err - } - if err = json.Unmarshal(b, meta); err != nil { + if err := json.Unmarshal(b, meta); err != nil { return "", err } if meta.Version != formatMetaVersionV1 { @@ -173,7 +170,7 @@ func formatGetBackendErasureVersion(formatPath string) (string, error) { } // Erasure backend found, proceed to detect version. format := &formatErasureVersionDetect{} - if err = json.Unmarshal(b, format); err != nil { + if err := json.Unmarshal(b, format); err != nil { return "", err } return format.Erasure.Version, nil @@ -182,50 +179,63 @@ func formatGetBackendErasureVersion(formatPath string) (string, error) { // Migrates all previous versions to latest version of `format.json`, // this code calls migration in sequence, such as V1 is migrated to V2 // first before it V2 migrates to V3.n -func formatErasureMigrate(export string) error { +func formatErasureMigrate(export string) ([]byte, fs.FileInfo, error) { formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) - version, err := formatGetBackendErasureVersion(formatPath) + formatData, formatFi, err := xioutil.ReadFileWithFileInfo(formatPath) if err != nil { - return fmt.Errorf("Disk %s: %w", export, err) + return nil, nil, err } + + version, err := formatGetBackendErasureVersion(formatData) + if err != nil { + return nil, nil, fmt.Errorf("Disk %s: %w", export, err) + } + + migrate := func(formatPath string, formatData []byte) ([]byte, fs.FileInfo, error) { + if err = ioutil.WriteFile(formatPath, formatData, 0o666); err != nil { + return nil, nil, err + } + formatFi, err := Lstat(formatPath) + if err != nil { + return nil, nil, err + } + return formatData, formatFi, nil + } + switch version { case formatErasureVersionV1: - if err = formatErasureMigrateV1ToV2(export, version); err != nil { - return fmt.Errorf("Disk %s: %w", export, err) + formatData, err = formatErasureMigrateV1ToV2(formatData, version) + if err != nil { + return nil, nil, fmt.Errorf("Disk %s: %w", export, err) } // Migrate successful v1 => v2, proceed to v2 => v3 version = formatErasureVersionV2 fallthrough case formatErasureVersionV2: - if err = formatErasureMigrateV2ToV3(export, version); err != nil { - return fmt.Errorf("Disk %s: %w", export, err) + formatData, err = formatErasureMigrateV2ToV3(formatData, export, version) + if err != nil { + return nil, nil, fmt.Errorf("Disk %s: %w", export, err) } // Migrate successful v2 => v3, v3 is latest // version = formatXLVersionV3 - fallthrough + return migrate(formatPath, formatData) case formatErasureVersionV3: // v3 is the latest version, return. - return nil + return formatData, formatFi, nil } - return fmt.Errorf(`Disk %s: unknown format version %s`, export, version) + return nil, nil, fmt.Errorf(`Disk %s: unknown format version %s`, export, version) } // Migrates version V1 of format.json to version V2 of format.json, // migration fails upon any error. -func formatErasureMigrateV1ToV2(export, version string) error { +func formatErasureMigrateV1ToV2(data []byte, version string) ([]byte, error) { if version != formatErasureVersionV1 { - return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version) + return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version) } - formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) - formatV1 := &formatErasureV1{} - b, err := xioutil.ReadFile(formatPath) - if err != nil { - return err - } - if err = json.Unmarshal(b, formatV1); err != nil { - return err + if err := json.Unmarshal(data, formatV1); err != nil { + return nil, err } formatV2 := &formatErasureV2{} @@ -238,53 +248,38 @@ func formatErasureMigrateV1ToV2(export, version string) error { formatV2.Erasure.Sets[0] = make([]string, len(formatV1.Erasure.JBOD)) copy(formatV2.Erasure.Sets[0], formatV1.Erasure.JBOD) - b, err = json.Marshal(formatV2) - if err != nil { - return err - } - return ioutil.WriteFile(formatPath, b, 0o666) + return json.Marshal(formatV2) } // Migrates V2 for format.json to V3 (Flat hierarchy for multipart) -func formatErasureMigrateV2ToV3(export, version string) error { +func formatErasureMigrateV2ToV3(data []byte, export, version string) ([]byte, error) { if version != formatErasureVersionV2 { - return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version) + return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version) } - formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) formatV2 := &formatErasureV2{} - b, err := xioutil.ReadFile(formatPath) - if err != nil { - return err - } - err = json.Unmarshal(b, formatV2) - if err != nil { - return err + if err := json.Unmarshal(data, formatV2); err != nil { + return nil, err } - if err = removeAll(pathJoin(export, minioMetaMultipartBucket)); err != nil { - return err - } - - if err = mkdirAll(pathJoin(export, minioMetaMultipartBucket), 0o755); err != nil { - return err + tmpOld := pathJoin(export, minioMetaTmpDeletedBucket, mustGetUUID()) + if err := renameAll(pathJoin(export, minioMetaMultipartBucket), + tmpOld); err != nil && err != errFileNotFound { + logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate", + pathJoin(export, minioMetaMultipartBucket), + tmpOld, + osErrToFileErr(err))) } // format-V2 struct is exactly same as format-V1 except that version is "3" // which indicates the simplified multipart backend. formatV3 := formatErasureV3{} - formatV3.Version = formatV2.Version formatV3.Format = formatV2.Format formatV3.Erasure = formatV2.Erasure - formatV3.Erasure.Version = formatErasureVersionV3 - b, err = json.Marshal(formatV3) - if err != nil { - return err - } - return ioutil.WriteFile(formatPath, b, 0o666) + return json.Marshal(formatV3) } // countErrs - count a specific error. @@ -382,25 +377,6 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) erro return nil } -var ignoredHiddenDirectories = map[string]struct{}{ - minioMetaBucket: {}, // metabucket '.minio.sys' - ".minio": {}, // users may choose to double down the backend as the config folder for certs - ".snapshot": {}, // .snapshot for ignoring NetApp based persistent volumes WAFL snapshot - "lost+found": {}, // 'lost+found' directory default on ext4 filesystems - "$RECYCLE.BIN": {}, // windows specific directory for each drive (hidden) - "System Volume Information": {}, // windows specific directory for each drive (hidden) -} - -func isHiddenDirectories(vols ...VolInfo) bool { - for _, vol := range vols { - if _, ok := ignoredHiddenDirectories[vol.Name]; ok { - continue - } - return false - } - return true -} - // loadFormatErasure - loads format.json from disk. func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) { buf, err := disk.ReadAll(context.TODO(), minioMetaBucket, formatConfigFile) @@ -408,17 +384,6 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) { // 'file not found' and 'volume not found' as // same. 'volume not found' usually means its a fresh disk. if err == errFileNotFound || err == errVolumeNotFound { - var vols []VolInfo - vols, err = disk.ListVols(context.TODO()) - if err != nil { - return nil, err - } - if !isHiddenDirectories(vols...) { - // 'format.json' not found, but we found user data, reject such disks. - return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w", - vols, disk, errCorruptedFormat) - } - // No other data found, its a fresh disk. return nil, errUnformattedDisk } return nil, err @@ -718,12 +683,18 @@ func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, format // relinquishes the underlying connection for all storage disks. func closeStorageDisks(storageDisks []StorageAPI) { + var wg sync.WaitGroup for _, disk := range storageDisks { if disk == nil { continue } - disk.Close() + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + disk.Close() + }(disk) } + wg.Wait() } func initStorageDisksWithErrorsWithoutHealthCheck(endpoints Endpoints) ([]StorageAPI, []error) { @@ -897,13 +868,10 @@ func makeFormatErasureMetaVolumes(disk StorageAPI) error { return errDiskNotFound } volumes := []string{ - minioMetaBucket, - minioMetaTmpBucket, - minioMetaMultipartBucket, - minioMetaTmpDeletedBucket, - dataUsageBucket, - pathJoin(minioMetaBucket, minioConfigPrefix), - minioMetaTmpBucket + "-old", + minioMetaTmpDeletedBucket, // creates .minio.sys/tmp as well as .minio.sys/tmp/.trash + minioMetaMultipartBucket, // creates .minio.sys/multipart + dataUsageBucket, // creates .minio.sys/buckets + minioConfigBucket, // creates .minio.sys/config } // Attempt to create MinIO internal buckets. return disk.MakeVolBulk(context.TODO(), volumes...) diff --git a/cmd/format-erasure_test.go b/cmd/format-erasure_test.go index d36b5f56e..2a61e3401 100644 --- a/cmd/format-erasure_test.go +++ b/cmd/format-erasure_test.go @@ -132,11 +132,12 @@ func TestFormatErasureMigrate(t *testing.T) { t.Fatal(err) } - if err = formatErasureMigrate(rootPath); err != nil { + formatData, _, err := formatErasureMigrate(rootPath) + if err != nil { t.Fatal(err) } - migratedVersion, err := formatGetBackendErasureVersion(pathJoin(rootPath, minioMetaBucket, formatConfigFile)) + migratedVersion, err := formatGetBackendErasureVersion(formatData) if err != nil { t.Fatal(err) } @@ -179,7 +180,7 @@ func TestFormatErasureMigrate(t *testing.T) { t.Fatal(err) } - if err = formatErasureMigrate(rootPath); err == nil { + if _, _, err = formatErasureMigrate(rootPath); err == nil { t.Fatal("Expected to fail with unexpected backend format") } @@ -199,7 +200,7 @@ func TestFormatErasureMigrate(t *testing.T) { t.Fatal(err) } - if err = formatErasureMigrate(rootPath); err == nil { + if _, _, err = formatErasureMigrate(rootPath); err == nil { t.Fatal("Expected to fail with unexpected backend format version number") } } diff --git a/cmd/globals.go b/cmd/globals.go index 1f21e1a81..fc14298fa 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -338,6 +338,9 @@ var ( globalServiceFreezeCnt int32 globalServiceFreezeMu sync.Mutex // Updates. + // List of local drives to this node, this is only set during server startup. + globalLocalDrives []StorageAPI + // Add new variable global values here. ) diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index 0d1ebb5d1..dee1f675c 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -20,6 +20,7 @@ package cmd import ( "context" "crypto/tls" + "errors" "fmt" "net/http" "net/url" @@ -70,7 +71,7 @@ var printEndpointError = func() func(Endpoint, error, bool) { }() // Cleans up tmp directory of the local disk. -func formatErasureCleanupTmp(diskPath string) error { +func formatErasureCleanupTmp(diskPath string) { // Need to move temporary objects left behind from previous run of minio // server to a unique directory under `minioMetaTmpBucket-old` to clean // up `minioMetaTmpBucket` for the current run. @@ -81,9 +82,23 @@ func formatErasureCleanupTmp(diskPath string) error { // // In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains // temporary objects from one of the previous runs of minio server. - tmpOld := pathJoin(diskPath, minioMetaTmpBucket+"-old", mustGetUUID()) + tmpID := mustGetUUID() + tmpOld := pathJoin(diskPath, minioMetaTmpBucket+"-old", tmpID) if err := renameAll(pathJoin(diskPath, minioMetaTmpBucket), - tmpOld); err != nil && err != errFileNotFound { + tmpOld); err != nil && !errors.Is(err, errFileNotFound) { + logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate", + pathJoin(diskPath, minioMetaTmpBucket), + tmpOld, + osErrToFileErr(err))) + } + + if err := mkdirAll(pathJoin(diskPath, minioMetaTmpDeletedBucket), 0o777); err != nil { + logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate", + pathJoin(diskPath, minioMetaTmpBucket), + err)) + } + + if err := renameAll(tmpOld, pathJoin(diskPath, minioMetaTmpDeletedBucket, tmpID)); err != nil && !errors.Is(err, errFileNotFound) { logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate", pathJoin(diskPath, minioMetaTmpBucket), tmpOld, @@ -92,16 +107,6 @@ func formatErasureCleanupTmp(diskPath string) error { // Renames and schedules for purging all bucket metacache. renameAllBucketMetacache(diskPath) - - // Removal of tmp-old folder is backgrounded completely. - go removeAll(pathJoin(diskPath, minioMetaTmpBucket+"-old")) - - if err := mkdirAll(pathJoin(diskPath, minioMetaTmpDeletedBucket), 0o777); err != nil { - logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate", - pathJoin(diskPath, minioMetaTmpBucket), - err)) - } - return nil } // Following error message is added to fix a regression in release @@ -178,19 +183,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, } }(storageDisks) - // Sanitize all local disks during server startup. - var wg sync.WaitGroup - for _, disk := range storageDisks { - if disk != nil && disk.IsLocal() { - wg.Add(1) - go func(disk StorageAPI) { - defer wg.Done() - disk.(*xlStorageDiskIDCheck).storage.(*xlStorage).Sanitize() - }(disk) - } - } - wg.Wait() - for i, err := range errs { if err != nil { if err == errDiskNotFound && retryCount >= 5 { @@ -298,10 +290,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, return nil, nil, err } - // The will always recreate some directories inside .minio.sys of - // the local disk such as tmp, multipart and background-ops - initErasureMetaVolumesInLocalDisks(storageDisks, formatConfigs) - return storageDisks, format, nil } diff --git a/cmd/tree-walk_test.go b/cmd/tree-walk_test.go index f99efff15..3a6f5c944 100644 --- a/cmd/tree-walk_test.go +++ b/cmd/tree-walk_test.go @@ -141,6 +141,8 @@ func TestTreeWalk(t *testing.T) { if err != nil { t.Fatalf("Unable to create tmp directory: %s", err) } + defer os.RemoveAll(fsDir) + endpoints := mustGetNewEndpoints(fsDir) disk, err := newStorageAPI(endpoints[0]) if err != nil { @@ -175,11 +177,6 @@ func TestTreeWalk(t *testing.T) { // Simple test when marker is set. testTreeWalkMarker(t, listDir, isLeaf, isLeafDir) - - err = os.RemoveAll(fsDir) - if err != nil { - t.Fatal(err) - } } // Test if tree walk go-routine exits cleanly if tree walk is aborted because of timeout. @@ -188,6 +185,7 @@ func TestTreeWalkTimeout(t *testing.T) { if err != nil { t.Fatalf("Unable to create tmp directory: %s", err) } + defer os.RemoveAll(fsDir) endpoints := mustGetNewEndpoints(fsDir) disk, err := newStorageAPI(endpoints[0]) if err != nil { @@ -250,10 +248,6 @@ func TestTreeWalkTimeout(t *testing.T) { if ok { t.Error("Tree-walk go routine has not exited after timeout.") } - err = os.RemoveAll(fsDir) - if err != nil { - t.Error(err) - } } // TestRecursiveWalk - tests if treeWalk returns entries correctly with and @@ -264,6 +258,7 @@ func TestRecursiveTreeWalk(t *testing.T) { if err != nil { t.Fatalf("Unable to create tmp directory: %s", err) } + defer os.RemoveAll(fsDir1) endpoints := mustGetNewEndpoints(fsDir1) disk1, err := newStorageAPI(endpoints[0]) @@ -366,10 +361,6 @@ func TestRecursiveTreeWalk(t *testing.T) { } }) } - err = os.RemoveAll(fsDir1) - if err != nil { - t.Error(err) - } } func TestSortedness(t *testing.T) { @@ -378,6 +369,7 @@ func TestSortedness(t *testing.T) { if err != nil { t.Errorf("Unable to create tmp directory: %s", err) } + defer os.RemoveAll(fsDir1) endpoints := mustGetNewEndpoints(fsDir1) disk1, err := newStorageAPI(endpoints[0]) @@ -444,12 +436,6 @@ func TestSortedness(t *testing.T) { t.Error(i+1, "Expected entries to be sort, but it wasn't") } } - - // Remove directory created for testing - err = os.RemoveAll(fsDir1) - if err != nil { - t.Error(err) - } } func TestTreeWalkIsEnd(t *testing.T) { @@ -458,6 +444,7 @@ func TestTreeWalkIsEnd(t *testing.T) { if err != nil { t.Errorf("Unable to create tmp directory: %s", err) } + defer os.RemoveAll(fsDir1) endpoints := mustGetNewEndpoints(fsDir1) disk1, err := newStorageAPI(endpoints[0]) @@ -526,10 +513,4 @@ func TestTreeWalkIsEnd(t *testing.T) { t.Errorf("Test %d: Last entry %s, doesn't have EOF marker set", i, entry.entry) } } - - // Remove directory created for testing - err = os.RemoveAll(fsDir1) - if err != nil { - t.Error(err) - } } diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 196edc9fe..e7439152b 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -110,6 +110,8 @@ type xlStorage struct { diskInfoCache timedValue sync.RWMutex + formatData []byte + // mutex to prevent concurrent read operations overloading walks. walkMu sync.Mutex walkReadMu sync.Mutex @@ -205,23 +207,9 @@ func newLocalXLStorage(path string) (*xlStorage, error) { }) } -// Sanitize - sanitizes the `format.json`, cleanup tmp. -// all other future cleanups should be added here. -func (s *xlStorage) Sanitize() error { - if err := formatErasureMigrate(s.diskPath); err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - // Create any missing paths. - makeFormatErasureMetaVolumes(s) - - return formatErasureCleanupTmp(s.diskPath) -} - // Initialize a new storage disk. -func newXLStorage(ep Endpoint) (*xlStorage, error) { +func newXLStorage(ep Endpoint) (s *xlStorage, err error) { path := ep.Path - var err error if path, err = getValidPath(path); err != nil { return nil, err } @@ -255,7 +243,7 @@ func newXLStorage(ep Endpoint) (*xlStorage, error) { } } - p := &xlStorage{ + s = &xlStorage{ diskPath: path, endpoint: ep, globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn, @@ -265,42 +253,56 @@ func newXLStorage(ep Endpoint) (*xlStorage, error) { diskIndex: -1, } + go formatErasureCleanupTmp(s.diskPath) // cleanup any old data. + + formatData, formatFi, err := formatErasureMigrate(s.diskPath) + if err != nil && !errors.Is(err, os.ErrNotExist) { + if os.IsPermission(err) { + return nil, errDiskAccessDenied + } else if isSysErrIO(err) { + return nil, errFaultyDisk + } + return nil, err + } + s.formatData = formatData + s.formatFileInfo = formatFi + + if len(s.formatData) == 0 { // Unformatted disk check if O_DIRECT is supported. + // Check if backend is writable and supports O_DIRECT + var rnd [32]byte + _, _ = rand.Read(rnd[:]) + filePath := pathJoin(s.diskPath, ".writable-check-"+hex.EncodeToString(rnd[:])+".tmp") + w, err := s.openFileDirect(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL) + if err != nil { + return s, err + } + _, err = w.Write(alignedBuf) + w.Close() + if err != nil { + if isSysErrInvalidArg(err) { + return s, errUnsupportedDisk + } + return s, err + } + Remove(filePath) + } else { + format := &formatErasureV3{} + json := jsoniter.ConfigCompatibleWithStandardLibrary + if err = json.Unmarshal(s.formatData, &format); err != nil { + return s, errCorruptedFormat + } + s.diskID = format.Erasure.This + s.formatLastCheck = time.Now() + s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1 + } + // Create all necessary bucket folders if possible. - if err = p.MakeVolBulk(context.TODO(), minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket, minioMetaSpeedTestBucket); err != nil { + if err = makeFormatErasureMetaVolumes(s); err != nil { return nil, err } - // Check if backend is writable and supports O_DIRECT - var rnd [8]byte - _, _ = rand.Read(rnd[:]) - tmpFile := ".writable-check-" + hex.EncodeToString(rnd[:]) + ".tmp" - filePath := pathJoin(p.diskPath, minioMetaTmpBucket, tmpFile) - w, err := OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666) - if err != nil { - switch { - case isSysErrInvalidArg(err): - return p, errUnsupportedDisk - case osIsPermission(err): - return p, errDiskAccessDenied - case isSysErrIO(err): - return p, errFaultyDisk - case isSysErrNotDir(err): - return p, errDiskNotDir - } - return p, err - } - if _, err = w.Write(alignedBuf); err != nil { - w.Close() - if isSysErrInvalidArg(err) { - return p, errUnsupportedDisk - } - return p, err - } - w.Close() - Remove(filePath) - // Success. - return p, nil + return s, nil } // getDiskInfo returns given disk information. @@ -308,7 +310,6 @@ func getDiskInfo(diskPath string) (di disk.Info, err error) { if err = checkPathLength(diskPath); err == nil { di, err = disk.GetInfo(diskPath) } - switch { case osIsNotExist(err): err = errDiskNotFound @@ -371,7 +372,7 @@ func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) { func (s *xlStorage) Healing() *healingTracker { healingFile := pathJoin(s.diskPath, minioMetaBucket, bucketMetaPrefix, healingTrackerFilename) - b, err := xioutil.ReadFile(healingFile) + b, err := ioutil.ReadFile(healingFile) if err != nil { return nil } @@ -624,8 +625,8 @@ func (s *xlStorage) GetDiskID() (string, error) { fileInfo := s.formatFileInfo lastCheck := s.formatLastCheck - // check if we have a valid disk ID that is less than 1 second old. - if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= time.Second { + // check if we have a valid disk ID that is less than 1 seconds old. + if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= 1*time.Second { s.RUnlock() return diskID, nil } @@ -645,7 +646,7 @@ func (s *xlStorage) GetDiskID() (string, error) { } formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile) - b, err := xioutil.ReadFile(formatFile) + b, err := ioutil.ReadFile(formatFile) if err != nil { // If the disk is still not initialized. if osIsNotExist(err) { @@ -676,6 +677,7 @@ func (s *xlStorage) GetDiskID() (string, error) { s.Lock() defer s.Unlock() + s.formatData = b s.diskID = format.Erasure.This s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1 s.formatFileInfo = fi @@ -1417,6 +1419,16 @@ func (s *xlStorage) readAllData(ctx context.Context, volumeDir string, filePath // This API is meant to be used on files which have small memory footprint, do // not use this on large files as it would cause server to crash. func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { + // Specific optimization to avoid re-read from the drives for `format.json` + // in-case the caller is a network operation. + if volume == minioMetaBucket && path == formatConfigFile { + s.RLock() + formatData := s.formatData + s.RUnlock() + if len(formatData) > 0 { + return formatData, nil + } + } volumeDir, err := s.getVolDir(volume) if err != nil { return nil, err @@ -1537,6 +1549,30 @@ func (s *xlStorage) ReadFile(ctx context.Context, volume string, path string, of return int64(len(buffer)), nil } +func (s *xlStorage) openFileDirect(path string, mode int) (f *os.File, err error) { + // Create top level directories if they don't exist. + // with mode 0o777 mkdir honors system umask. + mkdirAll(pathutil.Dir(path), 0o777) // don't need to fail here + + w, err := OpenFileDirectIO(path, mode, 0o666) + if err != nil { + switch { + case isSysErrInvalidArg(err): + return nil, errUnsupportedDisk + case osIsPermission(err): + return nil, errDiskAccessDenied + case isSysErrIO(err): + return nil, errFaultyDisk + case isSysErrNotDir(err): + return nil, errDiskNotDir + case os.IsNotExist(err): + return nil, errDiskNotFound + } + } + + return w, nil +} + func (s *xlStorage) openFileSync(filePath string, mode int) (f *os.File, err error) { // Create top level directories if they don't exist. // with mode 0777 mkdir honors system umask. diff --git a/cmd/xl-storage_test.go b/cmd/xl-storage_test.go index 04ef07664..b5ebd9972 100644 --- a/cmd/xl-storage_test.go +++ b/cmd/xl-storage_test.go @@ -128,13 +128,14 @@ func newXLStorageTestSetup() (*xlStorageDiskIDCheck, string, error) { if err != nil { return nil, "", err } + // Create a sample format.json file - err = storage.WriteAll(context.Background(), minioMetaBucket, formatConfigFile, []byte(`{"version":"1","format":"xl","id":"592a41c2-b7cc-4130-b883-c4b5cb15965b","xl":{"version":"3","this":"da017d62-70e3-45f1-8a1a-587707e69ad1","sets":[["e07285a6-8c73-4962-89c6-047fb939f803","33b8d431-482d-4376-b63c-626d229f0a29","cff6513a-4439-4dc1-bcaa-56c9e880c352","da017d62-70e3-45f1-8a1a-587707e69ad1","9c9f21d5-1f15-4737-bce6-835faa0d9626","0a59b346-1424-4fc2-9fa2-a2e80541d0c1","7924a3dc-b69a-4971-9a2e-014966d6aebb","4d2b8dd9-4e48-444b-bdca-c89194b26042"]],"distributionAlgo":"CRCMOD"}}`)) - if err != nil { + if err = storage.WriteAll(context.Background(), minioMetaBucket, formatConfigFile, []byte(`{"version":"1","format":"xl","id":"592a41c2-b7cc-4130-b883-c4b5cb15965b","xl":{"version":"3","this":"da017d62-70e3-45f1-8a1a-587707e69ad1","sets":[["e07285a6-8c73-4962-89c6-047fb939f803","33b8d431-482d-4376-b63c-626d229f0a29","cff6513a-4439-4dc1-bcaa-56c9e880c352","da017d62-70e3-45f1-8a1a-587707e69ad1","9c9f21d5-1f15-4737-bce6-835faa0d9626","0a59b346-1424-4fc2-9fa2-a2e80541d0c1","7924a3dc-b69a-4971-9a2e-014966d6aebb","4d2b8dd9-4e48-444b-bdca-c89194b26042"]],"distributionAlgo":"CRCMOD"}}`)); err != nil { return nil, "", err } + disk := newXLStorageDiskIDCheck(storage) - disk.diskID = "da017d62-70e3-45f1-8a1a-587707e69ad1" + disk.SetDiskID("da017d62-70e3-45f1-8a1a-587707e69ad1") return disk, diskPath, nil } diff --git a/internal/ioutil/ioutil.go b/internal/ioutil/ioutil.go index 0623e0dce..59ec15116 100644 --- a/internal/ioutil/ioutil.go +++ b/internal/ioutil/ioutil.go @@ -238,10 +238,7 @@ func SameFile(fi1, fi2 os.FileInfo) bool { if fi1.Mode() != fi2.Mode() { return false } - if fi1.Size() != fi2.Size() { - return false - } - return true + return fi1.Size() == fi2.Size() } // DirectioAlignSize - DirectIO alignment needs to be 4K. Defined here as diff --git a/internal/ioutil/read_file.go b/internal/ioutil/read_file.go index cf487f760..36eed94bd 100644 --- a/internal/ioutil/read_file.go +++ b/internal/ioutil/read_file.go @@ -19,11 +19,33 @@ package ioutil import ( "io" + "io/fs" "os" "github.com/minio/minio/internal/disk" ) +// ReadFileWithFileInfo reads the named file and returns the contents. +// A successful call returns err == nil, not err == EOF. +// Because ReadFile reads the whole file, it does not treat an EOF from Read +// as an error to be reported, additionall returns os.FileInfo +func ReadFileWithFileInfo(name string) ([]byte, fs.FileInfo, error) { + f, err := os.Open(name) + if err != nil { + return nil, nil, err + } + defer f.Close() + + st, err := f.Stat() + if err != nil { + return nil, nil, err + } + + dst := make([]byte, st.Size()) + _, err = io.ReadFull(f, dst) + return dst, st, err +} + // ReadFile reads the named file and returns the contents. // A successful call returns err == nil, not err == EOF. // Because ReadFile reads the whole file, it does not treat an EOF from Read