file-store/pkg/erasureencode/encode.go
2022-08-23 21:56:32 -05:00

109 lines
2.7 KiB
Go

package erasureencode
import (
"fmt"
"io"
"os"
"github.com/klauspost/reedsolomon"
)
func EncodeFile(file *os.File, oututs []io.Writer, stride int32, shards, parity uint16) (*EEMeta, error) {
stats, err := file.Stat()
if err != nil {
return nil, err
}
size := uint64(stats.Size())
meta := &EEMeta{
Params: Params{
Size: size,
Stride: stride,
Shards: shards,
Parity: parity,
},
}
if err := Encode(file, oututs, meta); err != nil {
return nil, err
}
return meta, nil
}
func Encode(file io.Reader, outputs []io.Writer, meta *EEMeta) error {
// int(uint16) + int(uint16) != int; should be safe
if int(meta.Params.Shards)+int(meta.Params.Parity) != len(outputs) {
return fmt.Errorf("expected the number of shards+parity to equal the number of output files provided")
}
// int(uint16), int(uint16), int(int32)
enc, err := reedsolomon.New(int(meta.Params.Shards), int(meta.Params.Parity), reedsolomon.WithAutoGoroutines(int(meta.Params.Stride)))
if err != nil {
return err
}
shards := uint64(meta.Params.Shards)
parity := uint64(meta.Params.Parity)
outputChunkCount := shards + parity
lastShardChunk := shards - 1
endShards := shards + parity
data := make([][]byte, outputChunkCount)
data[0] = []byte{}
written := false // track whether the current stripe has been written
for shard, csm := range meta.Params.Plan(0, meta.Params.Size) {
chunk := uint64(shard) % shards
written = false
// prepare data slices, shard size only meaningfuly changes at stripe boundary
if chunk == 0 {
// if int32(len(data[0])) != csm.Size {
for i := uint64(0); i < outputChunkCount; i++ {
data[i] = make([]byte, csm.Size)
}
// }
}
// read the individual shard
if _, err := io.ReadFull(file, data[csm.Chunk][0:csm.Size]); err != nil {
return err
}
meta.ShardHashes = append(meta.ShardHashes, sha256sum(data[csm.Chunk]))
// if we are on the last chunk calculate the parity and write things out
if chunk == lastShardChunk {
if err := writeChunks(data, outputs, enc, meta, shards, endShards); err != nil {
return err
}
written = true
}
}
if !written {
if err := writeChunks(data, outputs, enc, meta, shards, endShards); err != nil {
return err
}
written = true
}
meta.ShardMerkle = merkleSha256(meta.ShardHashes)
meta.ParityMerkle = merkleSha256(meta.ParityHashes)
return nil
}
func writeChunks(data [][]byte, files []io.Writer, enc reedsolomon.Encoder, meta *EEMeta, shards, endShards uint64) error {
if err := enc.Encode(data); err != nil {
return err
}
for i := shards; i < endShards; i++ {
meta.ParityHashes = append(meta.ParityHashes, sha256sum(data[i]))
}
for i := 0; i < len(data); i++ {
if _, err := files[i].Write(data[i]); err != nil {
return err
}
}
return nil
}