capture total errors data availability and any timeout errors (#17748)

This commit is contained in:
Harshavardhana 2023-07-29 23:26:26 -07:00 committed by GitHub
parent 48a3e9bc82
commit 5e5bdf5432
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 10 deletions

View file

@ -61,8 +61,10 @@ type DiskInfo struct {
// the number of calls of each API and the moving average of
// the duration of each API.
type DiskMetrics struct {
LastMinute map[string]AccElem `json:"apiLatencies,omitempty"`
APICalls map[string]uint64 `json:"apiCalls,omitempty"`
LastMinute map[string]AccElem `json:"apiLatencies,omitempty"`
APICalls map[string]uint64 `json:"apiCalls,omitempty"`
TotalErrorsAvailability uint64 `json:"totalErrsAvailability"`
TotalErrorsTimeout uint64 `json:"totalErrsTimeout"`
}
// VolsInfo is a collection of volume(bucket) information

View file

@ -399,6 +399,18 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
}
z.APICalls[za0003] = za0004
}
case "TotalErrorsAvailability":
z.TotalErrorsAvailability, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "TotalErrorsAvailability")
return
}
case "TotalErrorsTimeout":
z.TotalErrorsTimeout, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "TotalErrorsTimeout")
return
}
default:
err = dc.Skip()
if err != nil {
@ -412,9 +424,9 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
// EncodeMsg implements msgp.Encodable
func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// map header, size 4
// write "LastMinute"
err = en.Append(0x82, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
err = en.Append(0x84, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
if err != nil {
return
}
@ -457,15 +469,35 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
return
}
}
// write "TotalErrorsAvailability"
err = en.Append(0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
if err != nil {
return
}
err = en.WriteUint64(z.TotalErrorsAvailability)
if err != nil {
err = msgp.WrapError(err, "TotalErrorsAvailability")
return
}
// write "TotalErrorsTimeout"
err = en.Append(0xb2, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74)
if err != nil {
return
}
err = en.WriteUint64(z.TotalErrorsTimeout)
if err != nil {
err = msgp.WrapError(err, "TotalErrorsTimeout")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
// map header, size 4
// string "LastMinute"
o = append(o, 0x82, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
o = append(o, 0x84, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute)))
for za0001, za0002 := range z.LastMinute {
o = msgp.AppendString(o, za0001)
@ -482,6 +514,12 @@ func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.AppendString(o, za0003)
o = msgp.AppendUint64(o, za0004)
}
// string "TotalErrorsAvailability"
o = append(o, 0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
o = msgp.AppendUint64(o, z.TotalErrorsAvailability)
// string "TotalErrorsTimeout"
o = append(o, 0xb2, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74)
o = msgp.AppendUint64(o, z.TotalErrorsTimeout)
return
}
@ -563,6 +601,18 @@ func (z *DiskMetrics) UnmarshalMsg(bts []byte) (o []byte, err error) {
}
z.APICalls[za0003] = za0004
}
case "TotalErrorsAvailability":
z.TotalErrorsAvailability, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "TotalErrorsAvailability")
return
}
case "TotalErrorsTimeout":
z.TotalErrorsTimeout, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "TotalErrorsTimeout")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@ -591,6 +641,7 @@ func (z *DiskMetrics) Msgsize() (s int) {
s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size
}
}
s += 24 + msgp.Uint64Size + 19 + msgp.Uint64Size
return
}

View file

@ -77,6 +77,8 @@ const (
// Detects change in underlying disk.
type xlStorageDiskIDCheck struct {
totalErrsAvailability uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
totalErrsTimeout uint64 // Captures all timeout only errors
// apiCalls should be placed first so alignment is guaranteed for atomic operations.
apiCalls [storageMetricLast]uint64
apiLatencies [storageMetricLast]*lockedLastMinuteLatency
@ -102,6 +104,8 @@ func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
for i := range p.apiCalls {
diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i])
}
diskMetric.TotalErrorsAvailability = atomic.LoadUint64(&p.totalErrsAvailability)
diskMetric.TotalErrorsTimeout = atomic.LoadUint64(&p.totalErrsTimeout)
return diskMetric, nil
}
})
@ -661,15 +665,34 @@ func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...st
return func(errp *error) {
duration := time.Since(startTime)
var err error
if errp != nil && *errp != nil {
err = *errp
}
atomic.AddUint64(&p.apiCalls[s], 1)
if IsErr(err, []error{
errVolumeAccessDenied,
errFileAccessDenied,
errDiskAccessDenied,
errFaultyDisk,
errFaultyRemoteDisk,
context.DeadlineExceeded,
context.Canceled,
}...) {
atomic.AddUint64(&p.totalErrsAvailability, 1)
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
atomic.AddUint64(&p.totalErrsTimeout, 1)
}
}
p.apiLatencies[s].add(duration)
if trace {
var errStr string
if errp != nil && *errp != nil {
errStr = (*errp).Error()
}
paths = append([]string{p.String()}, paths...)
var errStr string
if err != nil {
errStr = err.Error()
}
globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " "), errStr))
}
}