tooling: Add xlmeta --combine switch that will combine inline data (#17488)

Will combine or write partial data of each version found in the inspect data.

Example:

```
> xl-meta -export -combine inspect-data.1228fb52.zip

(... metadata json...)
}
Attempting to combine version "994f1113-da94-4be1-8551-9dbc54b204bc".
Read shard 1 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-01-of-13.data)
Read shard 2 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-02-of-13.data)
Read shard 3 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-03-of-13.data)
Read shard 4 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-04-of-13.data)
Read shard 6 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-06-of-13.data)
Read shard 7 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-07-of-13.data)
Read shard 8 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-08-of-13.data)
Read shard 9 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-09-of-13.data)
Read shard 10 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-10-of-13.data)
Read shard 11 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-11-of-13.data)
Read shard 13 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-13-of-13.data)
Attempting to reconstruct using parity sets:
* Setup: Data shards: 9 - Parity blocks: 6
Have 6 complete remapped data shards and 6 complete parity shards. Could NOT reconstruct: too few shards given
* Setup: Data shards: 8 - Parity blocks: 5
Have 5 complete remapped data shards and 5 complete parity shards. Could reconstruct completely
0 bytes missing. Truncating 0 from the end.
Wrote output to 994f1113-da94-4be1-8551-9dbc54b204bc.complete
```

So far only inline data, but no real reason that external data can't also be included with some handling of blocks.

Supports only unencrypted data.
This commit is contained in:
Klaus Post 2023-06-22 12:41:24 -07:00 committed by GitHub
parent 74759b05a5
commit 15daa2e74a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -27,11 +27,15 @@ import (
"io"
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/google/uuid"
"github.com/klauspost/compress/zip"
"github.com/klauspost/filepathx"
"github.com/klauspost/reedsolomon"
"github.com/minio/cli"
"github.com/tinylib/msgp/msgp"
)
@ -74,10 +78,21 @@ FLAGS:
Usage: "export inline data",
Name: "export",
},
cli.BoolFlag{
Usage: "combine inline data",
Name: "combine",
},
}
app.Action = func(c *cli.Context) error {
ndjson := c.Bool("ndjson")
if c.Bool("data") && c.Bool("combine") {
return errors.New("cannot combine --data and --combine")
}
// file / version / file
filemap := make(map[string]map[string]string)
// versionID ->
combineFiles := make(map[string][]string)
decode := func(r io.Reader, file string) ([]byte, error) {
b, err := io.ReadAll(r)
if err != nil {
@ -87,7 +102,7 @@ FLAGS:
if err != nil {
return nil, err
}
filemap[file] = make(map[string]string)
buf := bytes.NewBuffer(nil)
var data xlMetaInlineData
switch minor {
@ -149,6 +164,21 @@ FLAGS:
Header: b,
Metadata: buf.Bytes(),
}
type erasureInfo struct {
V2Obj *struct {
EcDist []int
EcIndex int
EcM int
EcN int
}
}
var ei erasureInfo
if err := json.Unmarshal(buf.Bytes(), &ei); err == nil && ei.V2Obj != nil {
verID := uuid.UUID(header.VersionID).String()
idx := ei.V2Obj.EcIndex
filemap[file][verID] = fmt.Sprintf("%s/shard-%02d-of-%02d", verID, idx, ei.V2Obj.EcN+ei.V2Obj.EcM)
filemap[file][verID+".json"] = buf.String()
}
return nil
})
if err != nil {
@ -173,22 +203,39 @@ FLAGS:
buf = bytes.NewBuffer(b)
}
if c.Bool("export") {
file := strings.Map(func(r rune) rune {
switch {
case r >= 'a' && r <= 'z':
return r
case r >= 'A' && r <= 'Z':
return r
case r >= '0' && r <= '9':
return r
case strings.ContainsAny(string(r), "+=-_()!@."):
return r
default:
return '_'
}
}, file)
file := file
if !c.Bool("combine") {
file = strings.Map(func(r rune) rune {
switch {
case r >= 'a' && r <= 'z':
return r
case r >= 'A' && r <= 'Z':
return r
case r >= '0' && r <= '9':
return r
case strings.ContainsAny(string(r), "+=-_()!@."):
return r
default:
return '_'
}
}, file)
}
err := data.files(func(name string, data []byte) {
err = os.WriteFile(fmt.Sprintf("%s-%s.data", file, name), data, os.ModePerm)
fn := fmt.Sprintf("%s-%s.data", file, name)
if c.Bool("combine") {
f := filemap[file][name]
if f != "" {
fn = f + ".data"
os.MkdirAll(filepath.Dir(fn), os.ModePerm)
err = os.WriteFile(fn+".json", []byte(filemap[file][name+".json"]), os.ModePerm)
combineFiles[name] = append(combineFiles[name], fn)
if err != nil {
fmt.Println("ERR:", err)
}
_ = os.WriteFile(filepath.Dir(fn)+"/filename.txt", []byte(file), os.ModePerm)
}
}
err = os.WriteFile(fn, data, os.ModePerm)
if err != nil {
fmt.Println(err)
}
@ -312,6 +359,13 @@ FLAGS:
if multiple {
fmt.Println("}")
}
if len(combineFiles) > 0 {
for k, v := range combineFiles {
if err := combine(v, k); err != nil {
fmt.Println("ERROR:", err)
}
}
}
return nil
}
@ -587,3 +641,154 @@ func (z xlMetaV2VersionHeaderV2) MarshalJSON() (o []byte, err error) {
}
return json.Marshal(tmp)
}
func combine(files []string, out string) error {
sort.Strings(files)
var size, shards, data, parity int
mapped := make([]byte, size)
filled := make([]byte, size)
parityData := make(map[int]map[int][]byte)
fmt.Printf("Attempting to combine version %q.\n", out)
for _, file := range files {
b, err := os.ReadFile(file)
if err != nil {
return err
}
meta, err := os.ReadFile(file + ".json")
if err != nil {
return err
}
type erasureInfo struct {
V2Obj *struct {
EcDist []int
EcIndex int
EcM int
EcN int
Size int
}
}
var ei erasureInfo
var idx int
if err := json.Unmarshal(meta, &ei); err == nil && ei.V2Obj != nil {
if size == 0 {
size = ei.V2Obj.Size
mapped = make([]byte, size)
filled = make([]byte, size)
}
data = ei.V2Obj.EcM
parity = ei.V2Obj.EcN
if shards == 0 {
shards = data + parity
}
idx = ei.V2Obj.EcIndex - 1
fmt.Println("Read shard", ei.V2Obj.EcIndex, "Data shards", data, "Parity", parity, fmt.Sprintf("(%s)", file))
if ei.V2Obj.Size != size {
return fmt.Errorf("size mismatch. Meta size: %d", ei.V2Obj.Size)
}
} else {
return err
}
if len(b) < 32 {
return fmt.Errorf("file %s too short", file)
}
// Trim hash. Fine for inline data, since only one block.
b = b[32:]
set := parityData[data]
if set == nil {
set = make(map[int][]byte)
}
set[idx] = b
parityData[data] = set
// Combine
start := len(b) * idx
if start >= len(mapped) {
continue
}
copy(mapped[start:], b)
for j := range b {
if j+start >= len(filled) {
break
}
filled[j+start] = 1
}
}
lastValid := 0
missing := 0
for i := range filled {
if filled[i] == 1 {
lastValid = i
} else {
missing++
}
}
if missing > 0 && len(parityData) > 0 {
fmt.Println("Attempting to reconstruct using parity sets:")
for k, v := range parityData {
if missing == 0 {
break
}
fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", len(v))
rs, err := reedsolomon.New(k, shards-k)
if err != nil {
return err
}
split, err := rs.Split(mapped)
if err != nil {
return err
}
splitFilled, err := rs.Split(filled)
if err != nil {
return err
}
ok := len(splitFilled)
for i, sh := range splitFilled {
for _, v := range sh {
if v == 0 {
split[i] = nil
ok--
break
}
}
}
hasParity := 0
for idx, sh := range v {
split[idx] = sh
if idx >= k && len(v) > 0 {
hasParity++
}
}
fmt.Printf("Have %d complete remapped data shards and %d complete parity shards. ", ok, hasParity)
if err := rs.ReconstructData(split); err == nil {
fmt.Println("Could reconstruct completely")
for i, data := range split[:k] {
start := i * len(data)
copy(mapped[start:], data)
}
lastValid = size - 1
missing = 0
} else {
fmt.Println("Could NOT reconstruct:", err)
}
}
}
if lastValid == 0 {
return errors.New("no valid data found")
}
if missing > 0 {
out += ".truncated"
} else {
out += ".complete"
}
fmt.Println(missing, "bytes missing. Truncating", len(filled)-lastValid-1, "from end.")
mapped = mapped[:lastValid+1]
err := os.WriteFile(out, mapped, os.ModePerm)
if err != nil {
return err
}
fmt.Println("Wrote output to", out)
return nil
}