Add uptime to ServiceStatus (#3690)

This commit is contained in:
Krishnan Parthasarathi 2017-02-08 13:43:02 +05:30 committed by Harshavardhana
parent 7547f3c8a3
commit ce9aa2f2b2
7 changed files with 139 additions and 10 deletions

View file

@ -19,7 +19,6 @@ package cmd
import (
"encoding/json"
"encoding/xml"
"fmt"
"io/ioutil"
"net/http"
"net/url"
@ -54,8 +53,8 @@ type ServerVersion struct {
// ServerStatus - contains the response of service status API
type ServerStatus struct {
StorageInfo StorageInfo `json:"storageInfo"`
ServerVersion ServerVersion `json:"serverVersion"`
Uptime time.Duration `json:"uptime"`
}
// ServiceStatusHandler - GET /?service
@ -70,15 +69,22 @@ func (adminAPI adminAPIHandlers) ServiceStatusHandler(w http.ResponseWriter, r *
return
}
// Fetch storage backend information
storageInfo := newObjectLayerFn().StorageInfo()
// Fetch server version
serverVersion := ServerVersion{Version: Version, CommitID: CommitID}
// Fetch uptimes from all peers. This may fail to due to lack
// of read-quorum availability.
uptime, err := getPeerUptimes(globalAdminPeers)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
errorIf(err, "Possibly failed to get uptime from majority of servers.")
return
}
// Create API response
serverStatus := ServerStatus{
StorageInfo: storageInfo,
ServerVersion: serverVersion,
Uptime: uptime,
}
// Marshal API response
@ -542,7 +548,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt
// Create a new set of storage instances to heal format.json.
bootstrapDisks, err := initStorageDisks(globalEndpoints)
if err != nil {
fmt.Println(traceError(err))
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
@ -550,7 +555,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt
// Heal format.json on available storage.
err = healFormatXL(bootstrapDisks)
if err != nil {
fmt.Println(traceError(err))
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
@ -558,7 +562,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt
// Instantiate new object layer with newly formatted storage.
newObjectAPI, err := newXLObjects(bootstrapDisks)
if err != nil {
fmt.Println(traceError(err))
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}

View file

@ -25,6 +25,7 @@ import (
"net/http/httptest"
"net/url"
"testing"
"time"
router "github.com/gorilla/mux"
)
@ -55,6 +56,9 @@ func prepareAdminXLTestBed() (*adminXLTestBed, error) {
return nil, xlErr
}
// Initialize boot time
globalBootTime = time.Now().UTC()
// Set globalEndpoints for a single node XL setup.
for _, xlDir := range xlDirs {
globalEndpoints = append(globalEndpoints, &url.URL{
@ -225,14 +229,13 @@ func testServicesCmdHandler(cmd cmdType, t *testing.T) {
if cmd == statusCmd {
expectedInfo := ServerStatus{
StorageInfo: newObjectLayerFn().StorageInfo(),
ServerVersion: ServerVersion{Version: Version, CommitID: CommitID},
}
receivedInfo := ServerStatus{}
if jsonErr := json.Unmarshal(rec.Body.Bytes(), &receivedInfo); jsonErr != nil {
t.Errorf("Failed to unmarshal StorageInfo - %v", jsonErr)
}
if expectedInfo != receivedInfo {
if expectedInfo.ServerVersion != receivedInfo.ServerVersion {
t.Errorf("Expected storage info and received storage info differ, %v %v", expectedInfo, receivedInfo)
}
}

View file

@ -19,6 +19,7 @@ package cmd
import (
"net/url"
"path"
"sort"
"sync"
"time"
)
@ -39,6 +40,7 @@ type adminCmdRunner interface {
Restart() error
ListLocks(bucket, prefix string, duration time.Duration) ([]VolumeLockInfo, error)
ReInitDisks() error
Uptime() (time.Duration, error)
}
// Restart - Sends a message over channel to the go-routine
@ -88,6 +90,28 @@ func (rc remoteAdminClient) ReInitDisks() error {
return rc.Call("Admin.ReInitDisks", &args, &reply)
}
// Uptime - Returns the uptime of this server. Timestamp is taken
// after object layer is initialized.
func (lc localAdminClient) Uptime() (time.Duration, error) {
if globalBootTime.IsZero() {
return time.Duration(0), errServerNotInitialized
}
return time.Now().UTC().Sub(globalBootTime), nil
}
// Uptime - returns the uptime of the server to which the RPC call is made.
func (rc remoteAdminClient) Uptime() (time.Duration, error) {
args := AuthRPCArgs{}
reply := UptimeReply{}
err := rc.Call("Admin.Uptime", &args, &reply)
if err != nil {
return time.Duration(0), err
}
return reply.Uptime, nil
}
// adminPeer - represents an entity that implements Restart methods.
type adminPeer struct {
addr string
@ -241,3 +265,65 @@ func reInitPeerDisks(peers adminPeers) error {
wg.Wait()
return nil
}
// uptimeSlice - used to sort uptimes in chronological order.
type uptimeSlice []struct {
err error
uptime time.Duration
}
func (ts uptimeSlice) Len() int {
return len(ts)
}
func (ts uptimeSlice) Less(i, j int) bool {
return ts[i].uptime < ts[j].uptime
}
func (ts uptimeSlice) Swap(i, j int) {
ts[i], ts[j] = ts[j], ts[i]
}
// getPeerUptimes - returns the uptime since the last time read quorum
// was established on success. Otherwise returns errXLReadQuorum.
func getPeerUptimes(peers adminPeers) (time.Duration, error) {
uptimes := make(uptimeSlice, len(peers))
// Get up time of all servers.
wg := sync.WaitGroup{}
for i, peer := range peers {
wg.Add(1)
go func(idx int, peer adminPeer) {
defer wg.Done()
uptimes[idx].uptime, uptimes[idx].err = peer.cmdRunner.Uptime()
}(i, peer)
}
wg.Wait()
// Sort uptimes in chronological order.
sort.Sort(uptimes)
// Pick the readQuorum'th uptime in chronological order. i.e,
// the time at which read quorum was (re-)established.
readQuorum := len(uptimes) / 2
validCount := 0
latestUptime := time.Duration(0)
for _, uptime := range uptimes {
if uptime.err != nil {
continue
}
validCount++
if validCount >= readQuorum {
latestUptime = uptime.uptime
break
}
}
// This implies there weren't read quorum number of servers up.
if latestUptime == time.Duration(0) {
return time.Duration(0), InsufficientReadQuorum{}
}
return latestUptime, nil
}

View file

@ -48,6 +48,12 @@ type ListLocksReply struct {
volLocks []VolumeLockInfo
}
// UptimeReply - wraps the uptime response over RPC.
type UptimeReply struct {
AuthRPCReply
Uptime time.Duration
}
// Restart - Restart this instance of minio server.
func (s *adminCmd) Restart(args *AuthRPCArgs, reply *AuthRPCReply) error {
if err := args.IsAuthenticated(); err != nil {
@ -105,6 +111,27 @@ func (s *adminCmd) ReInitDisks(args *AuthRPCArgs, reply *AuthRPCReply) error {
return nil
}
// Uptime - returns the time when object layer was initialized on this server.
func (s *adminCmd) Uptime(args *AuthRPCArgs, reply *UptimeReply) error {
if err := args.IsAuthenticated(); err != nil {
return err
}
if globalBootTime.IsZero() {
return errServerNotInitialized
}
// N B The uptime is computed assuming that the system time is
// monotonic. This is not the case in time pkg in Go, see
// https://github.com/golang/go/issues/12914. This is expected
// to be fixed by go1.9.
*reply = UptimeReply{
Uptime: time.Now().UTC().Sub(globalBootTime),
}
return nil
}
// registerAdminRPCRouter - registers RPC methods for service status,
// stop and restart commands.
func registerAdminRPCRouter(mux *router.Router) error {

View file

@ -124,9 +124,13 @@ var (
// Global server's network statistics
globalConnStats = newConnStats()
// Global HTTP request statisitics
globalHTTPStats = newHTTPStats()
// Time when object layer was initialized on start up.
globalBootTime time.Time
// Add new variable global values here.
)

View file

@ -25,6 +25,7 @@ import (
"sort"
"strconv"
"strings"
"time"
"runtime"
@ -465,6 +466,9 @@ func serverMain(c *cli.Context) {
globalObjectAPI = newObject
globalObjLayerMutex.Unlock()
// Set startup time
globalBootTime = time.Now().UTC()
// Prints the formatted startup message once object layer is initialized.
printStartupMessage(apiEndPoints)

View file

@ -25,6 +25,7 @@ import (
"io/ioutil"
"net/http"
"net/url"
"time"
)
// BackendType - represents different backend types.
@ -70,6 +71,7 @@ type ServerVersion struct {
type ServiceStatusMetadata struct {
StorageInfo StorageInfo `json:"storageInfo"`
ServerVersion ServerVersion `json:"serverVersion"`
Uptime time.Duration `json:"uptime"`
}
// ServiceStatus - Connect to a minio server and call Service Status Management API