fix: In federated setup dial all hosts to figure out online host (#10074)

In federated NAS gateway setups, multiple hosts in srvRecords
was picked at random which could mean that if one of the
host was down the request can indeed fail and if client
retries it would succeed. Instead allow server to figure
out the current online host quickly such that we can
exclude the host which is down.

At the max the attempt to look for a downed node is to
300 millisecond, if the node is taking longer to respond
than this value we simply ignore and move to the node,
total attempts are equal to number of srvRecords if no
server is online we simply fallback to last dialed host.
This commit is contained in:
Harshavardhana 2020-07-17 14:25:47 -07:00 committed by GitHub
parent 14b1c9f8e4
commit 3fe27c8411
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 16 deletions

View file

@ -346,18 +346,36 @@ func isMinioReservedBucket(bucketName string) bool {
// returns a slice of hosts by reading a slice of DNS records
func getHostsSlice(records []dns.SrvRecord) []string {
var hosts []string
for _, r := range records {
hosts = append(hosts, net.JoinHostPort(r.Host, string(r.Port)))
hosts := make([]string, len(records))
for i, r := range records {
hosts[i] = net.JoinHostPort(r.Host, string(r.Port))
}
return hosts
}
// returns a host (and corresponding port) from a slice of DNS records
func getHostFromSrv(records []dns.SrvRecord) string {
rand.Seed(time.Now().Unix())
srvRecord := records[rand.Intn(len(records))]
return net.JoinHostPort(srvRecord.Host, string(srvRecord.Port))
var rng = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
// returns an online host (and corresponding port) from a slice of DNS records
func getHostFromSrv(records []dns.SrvRecord) (host string) {
hosts := getHostsSlice(records)
var d net.Dialer
var retry int
for retry < len(hosts) {
ctx, cancel := context.WithTimeout(GlobalContext, 300*time.Millisecond)
host = hosts[rng.Intn(len(hosts))]
conn, err := d.DialContext(ctx, "tcp", host)
cancel()
if err != nil {
retry++
continue
}
conn.Close()
break
}
return host
}
// IsCompressed returns true if the object is marked as compressed.

View file

@ -721,6 +721,11 @@ var getRemoteInstanceTransportOnce sync.Once
// Returns a minio-go Client configured to access remote host described by destDNSRecord
// Applicable only in a federated deployment
var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core, error) {
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
cred := getReqAccessCred(r, globalServerRegion)
// In a federated deployment, all the instances share config files
// and hence expected to have same credentials.
@ -732,10 +737,6 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core,
if err != nil {
return nil, err
}
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
return core, nil
}
@ -743,6 +744,11 @@ var getRemoteInstanceClient = func(r *http.Request, host string) (*miniogo.Core,
// Applicable only in a federated deployment.
// The transport does not contain any timeout except for dialing.
func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.Core, error) {
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
cred := getReqAccessCred(r, globalServerRegion)
// In a federated deployment, all the instances share config files
// and hence expected to have same credentials.
@ -754,10 +760,6 @@ func getRemoteInstanceClientLongTimeout(r *http.Request, host string) (*miniogo.
if err != nil {
return nil, err
}
getRemoteInstanceTransportOnce.Do(func() {
getRemoteInstanceTransport = NewGatewayHTTPTransport()
getRemoteInstanceTransportLongTO = newGatewayHTTPTransport(time.Hour)
})
return core, nil
}