From 2ef824bbb2eae0dd3d9236058df790cf8e359e9a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 20 Apr 2021 10:44:39 -0700 Subject: [PATCH] collapse two distinct calls into single RenameData() call (#12093) This is an optimization by reducing one extra system call, and many network operations. This reduction should increase the performance for small file workloads. --- cmd/bucket-replication.go | 6 +--- cmd/erasure-healing.go | 17 ++++----- cmd/erasure-metadata.go | 31 ----------------- cmd/erasure-multipart.go | 2 +- cmd/erasure-object.go | 62 +++++++-------------------------- cmd/erasure-object_test.go | 2 +- cmd/naughty-disk_test.go | 4 +-- cmd/storage-interface.go | 2 +- cmd/storage-rest-client.go | 11 ++++-- cmd/storage-rest-common.go | 3 +- cmd/storage-rest-server.go | 18 ++++++++-- cmd/xl-storage-disk-id-check.go | 6 ++-- cmd/xl-storage-format-v2.go | 1 + cmd/xl-storage.go | 22 +++++------- 14 files changed, 61 insertions(+), 126 deletions(-) diff --git a/cmd/bucket-replication.go b/cmd/bucket-replication.go index f22c1eb44..df501a413 100644 --- a/cmd/bucket-replication.go +++ b/cmd/bucket-replication.go @@ -1089,10 +1089,6 @@ func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, } func scheduleReplicationDelete(ctx context.Context, dv DeletedObjectVersionInfo, o ObjectLayer, sync bool) { - if sync { - replicateDelete(ctx, dv, o) - } else { - globalReplicationPool.queueReplicaDeleteTask(GlobalContext, dv) - } + globalReplicationPool.queueReplicaDeleteTask(GlobalContext, dv) globalReplicationStats.Update(dv.Bucket, 0, replication.Pending, replication.StatusType(""), replication.DeleteReplicationType) } diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index 062c49183..8aadce618 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -359,6 +359,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s cleanFileInfo := func(fi FileInfo) FileInfo { // Returns a copy of the 'fi' with checksums and parts nil'ed. nfi := fi + nfi.Erasure.Index = 0 nfi.Erasure.Checksums = nil nfi.Parts = nil return nfi @@ -486,24 +487,18 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s defer er.deleteObject(context.Background(), minioMetaTmpBucket, tmpID, len(storageDisks)/2+1) - // Generate and write `xl.meta` generated from other disks. - outDatedDisks, err = writeUniqueFileInfo(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, - partsMetadata, diskCount(outDatedDisks)) - if err != nil { - return result, toObjectErr(err, bucket, object, versionID) - } - // Rename from tmp location to the actual location. for i, disk := range outDatedDisks { if disk == OfflineDisk { continue } + // record the index of the updated disks + partsMetadata[i].Erasure.Index = i + 1 + // Attempt a rename now from healed data to final location. - if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i].DataDir, bucket, object); err != nil { - if err != errIsNotRegular && err != errFileNotFound { - logger.LogIf(ctx, err) - } + if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil { + logger.LogIf(ctx, err) return result, toObjectErr(err, bucket, object) } diff --git a/cmd/erasure-metadata.go b/cmd/erasure-metadata.go index ddbee9ce3..c9a99a6b6 100644 --- a/cmd/erasure-metadata.go +++ b/cmd/erasure-metadata.go @@ -282,37 +282,6 @@ func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Tim return findFileInfoInQuorum(ctx, metaArr, modTime, quorum) } -// Rename metadata content to destination location for each disk concurrently. -func renameFileInfo(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, quorum int) ([]StorageAPI, error) { - ignoredErr := []error{errFileNotFound} - - g := errgroup.WithNErrs(len(disks)) - - // Rename file on all underlying storage disks. - for index := range disks { - index := index - g.Go(func() error { - if disks[index] == nil { - return errDiskNotFound - } - if err := disks[index].RenameData(ctx, srcBucket, srcEntry, "", dstBucket, dstEntry); err != nil { - if !IsErrIgnored(err, ignoredErr...) { - return err - } - } - return nil - }, index) - } - - // Wait for all renames to finish. - errs := g.Wait() - - // We can safely allow RenameData errors up to len(er.getDisks()) - writeQuorum - // otherwise return failure. Cleanup successful renames. - err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, quorum) - return evalDisks(disks, errs), err -} - // writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently. func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, bucket, prefix string, files []FileInfo, quorum int) ([]StorageAPI, error) { g := errgroup.WithNErrs(len(disks)) diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index dbf76ec43..cc124d819 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -912,7 +912,7 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str // Rename the multipart object to final location. if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, - fi.DataDir, bucket, object, writeQuorum, nil); err != nil { + partsMetadata, bucket, object, writeQuorum); err != nil { return oi, toObjectErr(err, bucket, object) } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 17c015a49..b12b1cc90 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -95,7 +95,6 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d return fi.ToObjectInfo(srcBucket, srcObject), toObjectErr(errMethodNotAllowed, srcBucket, srcObject) } - onlineDisks, metaArr = shuffleDisksAndPartsMetadataByIndex(onlineDisks, metaArr, fi) versionID := srcInfo.VersionID if srcInfo.versionOnly { versionID = dstOpts.VersionID @@ -123,28 +122,11 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d } } - tempObj := mustGetUUID() - - var online int - // Cleanup in case of xl.meta writing failure - defer func() { - if online != len(onlineDisks) { - er.deleteObject(context.Background(), minioMetaTmpBucket, tempObj, writeQuorum) - } - }() - // Write unique `xl.meta` for each disk. - if onlineDisks, err = writeUniqueFileInfo(ctx, onlineDisks, minioMetaTmpBucket, tempObj, metaArr, writeQuorum); err != nil { + if _, err = writeUniqueFileInfo(ctx, onlineDisks, srcBucket, srcObject, metaArr, writeQuorum); err != nil { return oi, toObjectErr(err, srcBucket, srcObject) } - // Rename atomically `xl.meta` from tmp location to destination for each disk. - if _, err = renameFileInfo(ctx, onlineDisks, minioMetaTmpBucket, tempObj, srcBucket, srcObject, writeQuorum); err != nil { - return oi, toObjectErr(err, srcBucket, srcObject) - } - - online = countOnlineDisks(onlineDisks) - return fi.ToObjectInfo(srcBucket, srcObject), nil } @@ -525,8 +507,7 @@ func undoRename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry str } // Similar to rename but renames data from srcEntry to dstEntry at dataDir -func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dataDir, dstBucket, dstEntry string, writeQuorum int, ignoredErr []error) ([]StorageAPI, error) { - dataDir = retainSlash(dataDir) +func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry string, metadata []FileInfo, dstBucket, dstEntry string, writeQuorum int) ([]StorageAPI, error) { defer ObjectPathUpdated(pathJoin(srcBucket, srcEntry)) defer ObjectPathUpdated(pathJoin(dstBucket, dstEntry)) @@ -539,12 +520,16 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, da if disks[index] == nil { return errDiskNotFound } - if err := disks[index].RenameData(ctx, srcBucket, srcEntry, dataDir, dstBucket, dstEntry); err != nil { - if !IsErrIgnored(err, ignoredErr...) { - return err - } + // Pick one FileInfo for a disk at index. + fi := metadata[index] + // Assign index when index is initialized + if fi.Erasure.Index == 0 { + fi.Erasure.Index = index + 1 } - return nil + if fi.IsValid() { + return disks[index].RenameData(ctx, srcBucket, srcEntry, fi, dstBucket, dstEntry) + } + return errFileCorrupt }, index) } @@ -554,23 +539,6 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, da // We can safely allow RenameFile errors up to len(er.getDisks()) - writeQuorum // otherwise return failure. Cleanup successful renames. err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) - if err == errErasureWriteQuorum { - ug := errgroup.WithNErrs(len(disks)) - for index, disk := range disks { - if disk == nil { - continue - } - index := index - ug.Go(func() error { - // Undo all the partial rename operations. - if errs[index] == nil { - _ = disks[index].RenameData(context.Background(), dstBucket, dstEntry, dataDir, srcBucket, srcEntry) - } - return nil - }, index) - } - ug.Wait() - } return evalDisks(disks, errs), err } @@ -820,14 +788,8 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } } - // Write unique `xl.meta` for each disk. - if onlineDisks, err = writeUniqueFileInfo(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, writeQuorum); err != nil { - logger.LogIf(ctx, err) - return ObjectInfo{}, toObjectErr(err, bucket, object) - } - // Rename the successfully written temporary object to final location. - if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, fi.DataDir, bucket, object, writeQuorum, nil); err != nil { + if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum); err != nil { logger.LogIf(ctx, err) return ObjectInfo{}, toObjectErr(err, bucket, object) } diff --git a/cmd/erasure-object_test.go b/cmd/erasure-object_test.go index 8aace68b7..9faf96ffd 100644 --- a/cmd/erasure-object_test.go +++ b/cmd/erasure-object_test.go @@ -476,7 +476,7 @@ func TestPutObjectNoQuorum(t *testing.T) { // in a 16 disk Erasure setup. The original disks are 'replaced' with // naughtyDisks that fail after 'f' successful StorageAPI method // invocations, where f - [0,4) - for f := 0; f < 4; f++ { + for f := 0; f < 2; f++ { diskErrors := make(map[int]error) for i := 0; i <= f; i++ { diskErrors[i] = nil diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 52ee3df80..22e9d9fb5 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -191,11 +191,11 @@ func (d *naughtyDisk) AppendFile(ctx context.Context, volume string, path string return d.disk.AppendFile(ctx, volume, path, buf) } -func (d *naughtyDisk) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error { +func (d *naughtyDisk) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) error { if err := d.calcError(); err != nil { return err } - return d.disk.RenameData(ctx, srcVolume, srcPath, dataDir, dstVolume, dstPath) + return d.disk.RenameData(ctx, srcVolume, srcPath, fi, dstVolume, dstPath) } func (d *naughtyDisk) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error { diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index c7c13eaa6..3c505bf90 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -57,7 +57,7 @@ type StorageAPI interface { WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) error UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo) error ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (FileInfo, error) - RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error + RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) error // File operations. ListDir(ctx context.Context, volume, dirPath string, count int) ([]string, error) diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 81c732c0b..b61fb97e2 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -429,14 +429,19 @@ func (client *storageRESTClient) CheckParts(ctx context.Context, volume string, } // RenameData - rename source path to destination path atomically, metadata and data file. -func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) (err error) { +func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) (err error) { values := make(url.Values) values.Set(storageRESTSrcVolume, srcVolume) values.Set(storageRESTSrcPath, srcPath) - values.Set(storageRESTDataDir, dataDir) values.Set(storageRESTDstVolume, dstVolume) values.Set(storageRESTDstPath, dstPath) - respBody, err := client.call(ctx, storageRESTMethodRenameData, values, nil, -1) + + var reader bytes.Buffer + if err = msgp.Encode(&reader, &fi); err != nil { + return err + } + + respBody, err := client.call(ctx, storageRESTMethodRenameData, values, &reader, -1) defer http.DrainBody(respBody) return err diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index 4e90ef4f2..cd9e25b7b 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -17,7 +17,7 @@ package cmd const ( - storageRESTVersion = "v30" // Added UpdateMetadata() + storageRESTVersion = "v31" // Added RenameData with fileInfo() storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTPrefix = minioReservedBucketPath + "/storage" ) @@ -64,7 +64,6 @@ const ( storageRESTTotalVersions = "total-versions" storageRESTSrcVolume = "source-volume" storageRESTSrcPath = "source-path" - storageRESTDataDir = "data-dir" storageRESTDstVolume = "destination-volume" storageRESTDstPath = "destination-path" storageRESTOffset = "offset" diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 8ecb6423b..dddfaffcc 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -657,13 +657,25 @@ func (s *storageRESTServer) RenameDataHandler(w http.ResponseWriter, r *http.Req if !s.IsValid(w, r) { return } + vars := mux.Vars(r) srcVolume := vars[storageRESTSrcVolume] srcFilePath := vars[storageRESTSrcPath] - dataDir := vars[storageRESTDataDir] dstVolume := vars[storageRESTDstVolume] dstFilePath := vars[storageRESTDstPath] - err := s.storage.RenameData(r.Context(), srcVolume, srcFilePath, dataDir, dstVolume, dstFilePath) + + if r.ContentLength < 0 { + s.writeErrorResponse(w, errInvalidArgument) + return + } + + var fi FileInfo + if err := msgp.Decode(r.Body, &fi); err != nil { + s.writeErrorResponse(w, err) + return + } + + err := s.storage.RenameData(r.Context(), srcVolume, srcFilePath, fi, dstVolume, dstFilePath) if err != nil { s.writeErrorResponse(w, err) } @@ -1051,7 +1063,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodReadVersion).HandlerFunc(httpTraceHdrs(server.ReadVersionHandler)). Queries(restQueries(storageRESTVolume, storageRESTFilePath, storageRESTVersionID, storageRESTReadData)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodRenameData).HandlerFunc(httpTraceHdrs(server.RenameDataHandler)). - Queries(restQueries(storageRESTSrcVolume, storageRESTSrcPath, storageRESTDataDir, + Queries(restQueries(storageRESTSrcVolume, storageRESTSrcPath, storageRESTDstVolume, storageRESTDstPath)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodCreateFile).HandlerFunc(httpTraceHdrs(server.CreateFileHandler)). Queries(restQueries(storageRESTVolume, storageRESTFilePath, storageRESTLength)...) diff --git a/cmd/xl-storage-disk-id-check.go b/cmd/xl-storage-disk-id-check.go index f370b0bb6..c5cd5b8d0 100644 --- a/cmd/xl-storage-disk-id-check.go +++ b/cmd/xl-storage-disk-id-check.go @@ -398,8 +398,8 @@ func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPat return p.storage.RenameFile(ctx, srcVolume, srcPath, dstVolume, dstPath) } -func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error { - defer p.updateStorageMetrics(storageMetricRenameData, srcPath, dataDir, dstVolume, dstPath)() +func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) error { + defer p.updateStorageMetrics(storageMetricRenameData, srcPath, fi.DataDir, dstVolume, dstPath)() select { case <-ctx.Done(): @@ -411,7 +411,7 @@ func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPat return err } - return p.storage.RenameData(ctx, srcVolume, srcPath, dataDir, dstVolume, dstPath) + return p.storage.RenameData(ctx, srcVolume, srcPath, fi, dstVolume, dstPath) } func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) { diff --git a/cmd/xl-storage-format-v2.go b/cmd/xl-storage-format-v2.go index e9bb0339a..6ed51a597 100644 --- a/cmd/xl-storage-format-v2.go +++ b/cmd/xl-storage-format-v2.go @@ -1231,6 +1231,7 @@ func (z xlMetaV2) ToFileInfo(volume, path, versionID string) (fi FileInfo, err e if versionID != "" && versionID != nullVersionID { uv, err = uuid.Parse(versionID) if err != nil { + logger.LogIf(GlobalContext, fmt.Errorf("invalid versionID specified %s", versionID)) return FileInfo{}, errFileVersionNotFound } } diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 121ac4c29..5630e9112 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -1503,6 +1503,8 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz defer func() { if err != nil { if volume == minioMetaTmpBucket { + // only cleanup parent path if the + // parent volume name is minioMetaTmpBucket removeAll(parentFilePath) } } @@ -1522,7 +1524,9 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz return osErrToFileErr(err) } - if written > fileSize { + if written < fileSize { + return errLessData + } else if written > fileSize { return errMoreData } @@ -1818,7 +1822,7 @@ func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recu } // RenameData - rename source path to destination path atomically, metadata and data directory. -func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) (err error) { +func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) (err error) { defer func() { if err == nil { if s.globalSync { @@ -1861,6 +1865,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, var srcDataPath string var dstDataPath string + dataDir := retainSlash(fi.DataDir) if dataDir != "" { srcDataPath = retainSlash(pathJoin(srcVolumeDir, srcPath, dataDir)) // make sure to always use path.Join here, do not use pathJoin as @@ -1877,17 +1882,6 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, return err } - srcBuf, err := xioutil.ReadFile(srcFilePath) - if err != nil { - return osErrToFileErr(err) - } - - fi, err := getFileInfo(srcBuf, dstVolume, dstPath, "", true) - if err != nil { - logger.LogIf(ctx, err) - return err - } - dstBuf, err := xioutil.ReadFile(dstFilePath) if err != nil { if !osIsNotExist(err) { @@ -2058,11 +2052,13 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, // Commit meta-file if err = renameAll(srcFilePath, dstFilePath); err != nil { + logger.LogIf(ctx, err) return osErrToFileErr(err) } } else { // Write meta-file directly, no data if err = s.WriteAll(ctx, dstVolume, pathJoin(dstPath, xlStorageFormatFile), dstBuf); err != nil { + logger.LogIf(ctx, err) return err } }