support decommissioning of tiered objects (#16751)

This commit is contained in:
Poorna 2023-03-16 07:48:05 -07:00 committed by GitHub
parent a65df1e67b
commit d1e775313d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 155 additions and 23 deletions

View file

@ -2089,3 +2089,46 @@ func (er erasureObjects) restoreTransitionedObject(ctx context.Context, bucket s
})
return setRestoreHeaderFn(oi, err)
}
// DecomTieredObject - moves tiered object to another pool during decommissioning.
func (er erasureObjects) DecomTieredObject(ctx context.Context, bucket, object string, fi FileInfo, opts ObjectOptions) error {
if opts.UserDefined == nil {
opts.UserDefined = make(map[string]string)
}
// overlay Erasure info for this set of disks
storageDisks := er.getDisks()
// Get parity and data drive count based on storage class metadata
parityDrives := globalStorageClass.GetParityForSC(opts.UserDefined[xhttp.AmzStorageClass])
if parityDrives < 0 {
parityDrives = er.defaultParityCount
}
dataDrives := len(storageDisks) - parityDrives
// we now know the number of blocks this object needs for data and parity.
// writeQuorum is dataBlocks + 1
writeQuorum := dataDrives
if dataDrives == parityDrives {
writeQuorum++
}
// Initialize parts metadata
partsMetadata := make([]FileInfo, len(storageDisks))
fi2 := newFileInfo(pathJoin(bucket, object), dataDrives, parityDrives)
fi.Erasure = fi2.Erasure
// Initialize erasure metadata.
for index := range partsMetadata {
partsMetadata[index] = fi
partsMetadata[index].Erasure.Index = index + 1
}
// Order disks according to erasure distribution
var onlineDisks []StorageAPI
onlineDisks, partsMetadata = shuffleDisksAndPartsMetadata(storageDisks, partsMetadata, fi)
if _, err := writeUniqueFileInfo(ctx, onlineDisks, bucket, object, partsMetadata, writeQuorum); err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}

View file

@ -736,14 +736,16 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
objInfo := fi.ToObjectInfo(bucket, object, versioned)
evt := evalActionFromLifecycle(ctx, *lc, lr, objInfo)
if evt.Action.Delete() {
switch {
case evt.Action.DeleteRestored(): // if restored copy has expired,delete it synchronously
applyExpiryOnTransitionedObject(ctx, z, objInfo, evt.Action.DeleteRestored())
return false
case evt.Action.Delete():
globalExpiryState.enqueueByDays(objInfo, evt.Action.DeleteRestored(), evt.Action.DeleteVersioned())
if !evt.Action.DeleteRestored() {
return true
}
return true
default:
return false
}
return false
}
decommissionEntry := func(entry metaCacheEntry) {
@ -767,12 +769,6 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
var decommissionedCount int
for _, version := range fivs.Versions {
// TODO: Skip transitioned objects for now.
if version.IsRemote() {
logger.LogIf(ctx, fmt.Errorf("found %s/%s transitioned object, transitioned object won't be decommissioned", bi.Name, version.Name))
continue
}
// Apply lifecycle rules on the objects that are expired.
if filterLifecycle(bi.Name, version.Name, version) {
logger.LogIf(ctx, fmt.Errorf("found %s/%s (%s) expired object based on ILM rules, skipping and scheduled for deletion", bi.Name, version.Name, version.VersionID))
@ -821,6 +817,22 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
var failure, ignore bool
// gr.Close() is ensured by decommissionObject().
for try := 0; try < 3; try++ {
if version.IsRemote() {
stopFn := globalDecommissionMetrics.log(decomMetricDecommissionObject, idx, bi.Name, version.Name, version.VersionID)
if err := z.DecomTieredObject(ctx, bi.Name, version.Name, version, ObjectOptions{
VersionID: version.VersionID,
MTime: version.ModTime,
UserDefined: version.Metadata,
}); err != nil {
stopFn(err)
failure = true
logger.LogIf(ctx, err)
continue
}
stopFn(nil)
failure = false
break
}
gr, err := set.GetObjectNInfo(ctx,
bi.Name,
encodeDirObject(version.Name),
@ -1268,17 +1280,6 @@ func (z *erasureServerPools) StartDecommission(ctx context.Context, indices ...i
z.HealBucket(ctx, bucket.Name, madmin.HealOpts{})
}
// TODO: Support decommissioning transition tiers.
for _, bucket := range decomBuckets {
if lc, err := globalLifecycleSys.Get(bucket.Name); err == nil {
if lc.HasTransition() {
return decomError{
Err: fmt.Sprintf("Bucket is part of transitioned tier %s: decommission is not allowed in Tier'd setups", bucket.Name),
}
}
}
}
// Create .minio.sys/conifg, .minio.sys/buckets paths if missing,
// this code is present to avoid any missing meta buckets on other
// pools.

View file

@ -2348,3 +2348,27 @@ func (z *erasureServerPools) CheckAbandonedParts(ctx context.Context, bucket, ob
}
return nil
}
// DecomTieredObject - moves tiered object to another pool during decommissioning.
func (z *erasureServerPools) DecomTieredObject(ctx context.Context, bucket, object string, fi FileInfo, opts ObjectOptions) error {
object = encodeDirObject(object)
if z.SinglePool() {
return fmt.Errorf("error decommissioning %s/%s", bucket, object)
}
if !opts.NoLock {
ns := z.NewNSLock(bucket, object)
lkctx, err := ns.GetLock(ctx, globalOperationTimeout)
if err != nil {
return err
}
ctx = lkctx.Context()
defer ns.Unlock(lkctx)
opts.NoLock = true
}
idx, err := z.getPoolIdxNoLock(ctx, bucket, object, fi.Size)
if err != nil {
return err
}
return z.serverPools[idx].DecomTieredObject(ctx, bucket, object, fi, opts)
}

View file

@ -1215,6 +1215,12 @@ func (s *erasureSets) PutObjectMetadata(ctx context.Context, bucket, object stri
return er.PutObjectMetadata(ctx, bucket, object, opts)
}
// DecomTieredObject - moves tiered object to another pool during decommissioning.
func (s *erasureSets) DecomTieredObject(ctx context.Context, bucket, object string, fi FileInfo, opts ObjectOptions) error {
er := s.getHashedSet(object)
return er.DecomTieredObject(ctx, bucket, object, fi, opts)
}
// PutObjectTags - replace or add tags to an existing object
func (s *erasureSets) PutObjectTags(ctx context.Context, bucket, object string, tags string, opts ObjectOptions) (ObjectInfo, error) {
er := s.getHashedSet(object)

View file

@ -254,6 +254,7 @@ type ObjectLayer interface {
// Metadata operations
PutObjectMetadata(context.Context, string, string, ObjectOptions) (ObjectInfo, error)
DecomTieredObject(context.Context, string, string, FileInfo, ObjectOptions) error
// ObjectTagging operations
PutObjectTags(context.Context, string, string, string, ObjectOptions) (ObjectInfo, error)

View file

@ -6,6 +6,7 @@ fi
pkill minio
rm -rf /tmp/xl
rm -rf /tmp/xltier
if [ ! -f ./mc ]; then
wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \
@ -45,6 +46,23 @@ expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum)
user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l)
## create a warm tier instance
(minio server /tmp/xltier/{1...4}/disk{0...1} --address :9001 2>&1 >/dev/null)&
sleep 2
export MC_HOST_mytier="http://minioadmin:minioadmin@localhost:9001/"
./mc mb -l myminio/bucket2
./mc mb -l mytier/tiered
## create a tier and set up ilm policy to tier immediately
./mc admin tier add minio myminio TIER1 --endpoint http://localhost:9001 --access-key minioadmin --secret-key minioadmin --bucket tiered --prefix prefix5/
./mc ilm add myminio/bucket2 --transition-days 0 --transition-tier TIER1 --transition-days 0
## mirror some content to bucket2 and capture versions tiered
./mc mirror internal myminio/bucket2/ --quiet >/dev/null
./mc ls -r myminio/bucket2/ > bucket2_ns.txt
./mc ls -r --versions myminio/bucket2/ > bucket2_ns_versions.txt
sleep 2
./mc ls -r --versions mytier/tiered/ > tiered_ns_versions.txt
kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) &
pid=$!
@ -134,4 +152,43 @@ if [ "${expected_checksum}" != "${got_checksum}" ]; then
exit 1
fi
# after decommissioning, compare listings in bucket2 and tiered
./mc version info myminio/bucket2 | grep -q "versioning is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected versioning enabled after decommission on bucket2"
exit 1
fi
./mc ls -r myminio/bucket2 > decommissioned_bucket2_ns.txt
./mc ls -r --versions myminio/bucket2 > decommissioned_bucket2_ns_versions.txt
./mc ls -r --versions mytier/tiered/ > tiered_ns_versions2.txt
out=$(diff -qpruN bucket2_ns.txt decommissioned_bucket2_ns.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission in bucket2: $out"
exit 1
fi
out=$(diff -qpruN bucket2_ns_versions.txt decommissioned_bucket2_ns_versions.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission in bucket2x: $out"
exit 1
fi
out=$(diff -qpruN tiered_ns_versions.txt tiered_ns_versions2.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission in warm tier: $out"
exit 1
fi
got_checksum=$(./mc cat myminio/bucket2/dsync/drwmutex.go | md5sum)
if [ "${expected_checksum}" != "${got_checksum}" ]; then
echo "BUG: decommission failed on encrypted objects with tiering: expected ${expected_checksum} got ${got_checksum}"
exit 1
fi
kill $pid