// Copyright (c) 2015-2021 MinIO, Inc. // // This file is part of MinIO Object Storage stack // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . // Package etag provides an implementation of S3 ETags. // // Each S3 object has an associated ETag that can be // used to e.g. quickly compare objects or check whether // the content of an object has changed. // // In general, an S3 ETag is an MD5 checksum of the object // content. However, there are many exceptions to this rule. // // // Single-part Upload // // In case of a basic single-part PUT operation - without server // side encryption or object compression - the ETag of an object // is its content MD5. // // // Multi-part Upload // // The ETag of an object does not correspond to its content MD5 // when the object is uploaded in multiple parts via the S3 // multipart API. Instead, S3 first computes a MD5 of each part: // e1 := MD5(part-1) // e2 := MD5(part-2) // ... // eN := MD5(part-N) // // Then, the ETag of the object is computed as MD5 of all individual // part checksums. S3 also encodes the number of parts into the ETag // by appending a - at the end: // ETag := MD5(e1 || e2 || e3 ... || eN) || -N // // For example: ceb8853ddc5086cc4ab9e149f8f09c88-5 // // However, this scheme is only used for multipart objects that are // not encrypted. // // Server-side Encryption // // S3 specifies three types of server-side-encryption - SSE-C, SSE-S3 // and SSE-KMS - with different semantics w.r.t. ETags. // In case of SSE-S3, the ETag of an object is computed the same as // for single resp. multipart plaintext objects. In particular, // the ETag of a singlepart SSE-S3 object is its content MD5. // // In case of SSE-C and SSE-KMS, the ETag of an object is computed // differently. For singlepart uploads the ETag is not the content // MD5 of the object. For multipart uploads the ETag is also not // the MD5 of the individual part checksums but it still contains // the number of parts as suffix. // // Instead, the ETag is kind of unpredictable for S3 clients when // an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3 // computes the ETag as MD5 of the encrypted content but there is // no way to verify this assumption since the encryption happens // inside AWS S3. // Therefore, S3 clients must not make any assumption about ETags // in case of SSE-C or SSE-KMS except that the ETag is well-formed. // // To put all of this into a simple rule: // SSE-S3 : ETag == MD5 // SSE-C : ETag != MD5 // SSE-KMS: ETag != MD5 // // // Encrypted ETags // // An S3 implementation has to remember the content MD5 of objects // in case of SSE-S3. However, storing the ETag of an encrypted // object in plaintext may reveal some information about the object. // For example, two objects with the same ETag are identical with // a very high probability. // // Therefore, an S3 implementation may encrypt an ETag before storing // it. In this case, the stored ETag may not be a well-formed S3 ETag. // For example, it can be larger due to a checksum added by authenticated // encryption schemes. Such an ETag must be decrypted before sent to an // S3 client. // // // S3 Clients // // There are many different S3 client implementations. Most of them // access the ETag by looking for the HTTP response header key "Etag". // However, some of them assume that the header key has to be "ETag" // (case-sensitive) and will fail otherwise. // Further, some clients require that the ETag value is a double-quoted // string. Therefore, this package provides dedicated functions for // adding and extracing the ETag to/from HTTP headers. package etag import ( "bytes" "crypto/md5" "encoding/base64" "encoding/hex" "errors" "fmt" "net/http" "strconv" "strings" ) // ETag is a single S3 ETag. // // An S3 ETag sometimes corresponds to the MD5 of // the S3 object content. However, when an object // is encrypted, compressed or uploaded using // the S3 multipart API then its ETag is not // necessarily the MD5 of the object content. // // For a more detailed description of S3 ETags // take a look at the package documentation. type ETag []byte // String returns the string representation of the ETag. // // The returned string is a hex representation of the // binary ETag with an optional '-' suffix. func (e ETag) String() string { if e.IsMultipart() { return hex.EncodeToString(e[:16]) + string(e[16:]) } return hex.EncodeToString(e) } // IsEncrypted reports whether the ETag is encrypted. func (e ETag) IsEncrypted() bool { return len(e) > 16 && !bytes.ContainsRune(e, '-') } // IsMultipart reports whether the ETag belongs to an // object that has been uploaded using the S3 multipart // API. // An S3 multipart ETag has a - suffix. func (e ETag) IsMultipart() bool { return len(e) > 16 && bytes.ContainsRune(e, '-') } // Parts returns the number of object parts that are // referenced by this ETag. It returns 1 if the object // has been uploaded using the S3 singlepart API. // // Parts may panic if the ETag is an invalid multipart // ETag. func (e ETag) Parts() int { if !e.IsMultipart() { return 1 } n := bytes.IndexRune(e, '-') parts, err := strconv.Atoi(string(e[n+1:])) if err != nil { panic(err) // malformed ETag } return parts } var _ Tagger = ETag{} // compiler check // ETag returns the ETag itself. // // By providing this method ETag implements // the Tagger interface. func (e ETag) ETag() ETag { return e } // FromContentMD5 decodes and returns the Content-MD5 // as ETag, if set. If no Content-MD5 header is set // it returns an empty ETag and no error. func FromContentMD5(h http.Header) (ETag, error) { v, ok := h["Content-Md5"] if !ok { return nil, nil } if v[0] == "" { return nil, errors.New("etag: content-md5 is set but contains no value") } b, err := base64.StdEncoding.Strict().DecodeString(v[0]) if err != nil { return nil, err } if len(b) != md5.Size { return nil, errors.New("etag: invalid content-md5") } return ETag(b), nil } // Multipart computes an S3 multipart ETag given a list of // S3 singlepart ETags. It returns nil if the list of // ETags is empty. // // Any encrypted or multipart ETag will be ignored and not // used to compute the returned ETag. func Multipart(etags ...ETag) ETag { if len(etags) == 0 { return nil } var n int64 h := md5.New() for _, etag := range etags { if !etag.IsMultipart() && !etag.IsEncrypted() { h.Write(etag) n++ } } etag := append(h.Sum(nil), '-') return strconv.AppendInt(etag, n, 10) } // Set adds the ETag to the HTTP headers. It overwrites any // existing ETag entry. // // Due to legacy S3 clients, that make incorrect assumptions // about HTTP headers, Set should be used instead of // http.Header.Set(...). Otherwise, some S3 clients will not // able to extract the ETag. func Set(etag ETag, h http.Header) { // Some (broken) S3 clients expect the ETag header to // literally "ETag" - not "Etag". Further, some clients // expect an ETag in double quotes. Therefore, we set the // ETag directly as map entry instead of using http.Header.Set h["ETag"] = []string{`"` + etag.String() + `"`} } // Get extracts and parses an ETag from the given HTTP headers. // It returns an error when the HTTP headers do not contain // an ETag entry or when the ETag is malformed. // // Get only accepts AWS S3 compatible ETags - i.e. no // encrypted ETags - and therefore is stricter than Parse. func Get(h http.Header) (ETag, error) { const strict = true if v := h.Get("Etag"); v != "" { return parse(v, strict) } v, ok := h["ETag"] if !ok || len(v) == 0 { return nil, errors.New("etag: HTTP header does not contain an ETag") } return parse(v[0], strict) } // Equal returns true if and only if the two ETags are // identical. func Equal(a, b ETag) bool { return bytes.Equal(a, b) } // Parse parses s as an S3 ETag, returning the result. // The string can be an encrypted, singlepart // or multipart S3 ETag. It returns an error if s is // not a valid textual representation of an ETag. func Parse(s string) (ETag, error) { const strict = false return parse(s, strict) } // parse parse s as an S3 ETag, returning the result. // It operates in one of two modes: // - strict // - non-strict // // In strict mode, parse only accepts ETags that // are AWS S3 compatible. In particular, an AWS // S3 ETag always consists of a 128 bit checksum // value and an optional - suffix. // Therefore, s must have the following form in // strict mode: <32-hex-characters>[-] // // In non-strict mode, parse also accepts ETags // that are not AWS S3 compatible - e.g. encrypted // ETags. func parse(s string, strict bool) (ETag, error) { // An S3 ETag may be a double-quoted string. // Therefore, we remove double quotes at the // start and end, if any. if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) { s = s[1 : len(s)-1] } // An S3 ETag may be a multipart ETag that // contains a '-' followed by a number. // If the ETag does not a '-' is is either // a singlepart or encrypted ETag. n := strings.IndexRune(s, '-') if n == -1 { etag, err := hex.DecodeString(s) if err != nil { return nil, err } if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long return nil, fmt.Errorf("etag: invalid length %d", len(etag)) } return ETag(etag), nil } prefix, suffix := s[:n], s[n:] if len(prefix) != 32 { return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix)) } if len(suffix) <= 1 { return nil, errors.New("etag: suffix is not a part number") } etag, err := hex.DecodeString(prefix) if err != nil { return nil, err } partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1] if err != nil { return nil, err } if strict && (partNumber == 0 || partNumber > 10000) { return nil, fmt.Errorf("etag: invalid part number %d", partNumber) } return ETag(append(etag, suffix...)), nil }