chore_: add `klauspost/reedsolomon` module

2024-04-04 17:52:49 +02:00 · 2024-04-04 17:52:49 +02:00 · 4f2adc0ced
parent bd91f5ab49
commit 4f2adc0ced
398 changed files with 216863 additions and 3038 deletions
--- a/go.mod
+++ b/go.mod
@ -84,6 +84,7 @@ require (
 	github.com/ipfs/go-log/v2 v2.5.1
 	github.com/jellydator/ttlcache/v3 v3.2.0
 	github.com/jmoiron/sqlx v1.3.5
+	github.com/klauspost/reedsolomon v1.12.1
 	github.com/ladydascalie/currency v1.6.0
 	github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8
 	github.com/mutecomm/go-sqlcipher/v4 v4.4.2
@ -177,7 +178,7 @@ require (
 	github.com/jackpal/go-nat-pmp v1.0.2 // indirect
 	github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
 	github.com/klauspost/compress v1.16.7 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.5 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
 	github.com/koron/go-ssdp v0.0.4 // indirect
 	github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
 	github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
@ -279,7 +280,7 @@ require (
 	go.uber.org/fx v1.20.0 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sync v0.3.0 // indirect
-	golang.org/x/sys v0.11.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
 	golang.org/x/term v0.11.0 // indirect
 	golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect
 	golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
--- a/go.sum
+++ b/go.sum
@ -1295,10 +1295,12 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
 github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
 github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
-github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
 github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/klauspost/reedsolomon v1.12.1 h1:NhWgum1efX1x58daOBGCFWcxtEhOhXKKl1HAPQUp03Q=
+github.com/klauspost/reedsolomon v1.12.1/go.mod h1:nEi5Kjb6QqtbofI6s+cbG/j1da11c96IBYBSnVGtuBs=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -2637,8 +2639,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
-golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@ -9,10 +9,7 @@ You can access the CPU information by accessing the shared CPU variable of the c
 Package home: https://github.com/klauspost/cpuid

 [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
-[![Build Status][3]][4]
-
-[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
-[4]: https://travis-ci.org/klauspost/cpuid
+[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)

 ## installing

@ -285,7 +282,12 @@ Exit Code 1
 | AMXINT8            | Tile computational operations on 8-bit integers                                                                                                                                    |
 | AMXFP16            | Tile computational operations on FP16 numbers                                                                                                                                      |
 | AMXTILE            | Tile architecture                                                                                                                                                                  |
+| APX_F              | Intel APX                                                                                                                                                                          |
 | AVX                | AVX functions                                                                                                                                                                      |
+| AVX10              | If set the Intel AVX10 Converged Vector ISA is supported                                                                                                                           |
+| AVX10_128          | If set indicates that AVX10 128-bit vector support is present                                                                                                                      |
+| AVX10_256          | If set indicates that AVX10 256-bit vector support is present                                                                                                                      |
+| AVX10_512          | If set indicates that AVX10 512-bit vector support is present                                                                                                                      |
 | AVX2               | AVX2 functions                                                                                                                                                                     |
 | AVX512BF16         | AVX-512 BFLOAT16 Instructions                                                                                                                                                      |
 | AVX512BITALG       | AVX-512 Bit Algorithms                                                                                                                                                             |
@ -365,6 +367,8 @@ Exit Code 1
 | IDPRED_CTRL        | IPRED_DIS                                                                                                                                                                          |
 | INT_WBINVD         | WBINVD/WBNOINVD are interruptible.                                                                                                                                                 |
 | INVLPGB            | NVLPGB and TLBSYNC instruction supported                                                                                                                                           |
+| KEYLOCKER          | Key locker                                                                                                                                                                         |
+| KEYLOCKERW         | Key locker wide                                                                                                                                                                    |
 | LAHF               | LAHF/SAHF in long mode                                                                                                                                                             |
 | LAM                | If set, CPU supports Linear Address Masking                                                                                                                                        |
 | LBRVIRT            | LBR virtualization                                                                                                                                                                 |
@ -380,7 +384,7 @@ Exit Code 1
 | MOVDIRI            | Move Doubleword as Direct Store                                                                                                                                                    |
 | MOVSB_ZL           | Fast Zero-Length MOVSB                                                                                                                                                             |
 | MPX                | Intel MPX (Memory Protection Extensions)                                                                                                                                           |
-| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD       |
+| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD        |
 | MSRIRC             | Instruction Retired Counter MSR available                                                                                                                                          |
 | MSRLIST            | Read/Write List of Model Specific Registers                                                                                                                                        |
 | MSR_PAGEFLUSH      | Page Flush MSR available                                                                                                                                                           |
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@ -67,188 +67,200 @@ const (
 	// Keep index -1 as unknown
 	UNKNOWN = -1

-	// Add features
-	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	AESNI                               // Advanced Encryption Standard New Instructions
-	AMD3DNOW                            // AMD 3DNOW
-	AMD3DNOWEXT                         // AMD 3DNowExt
-	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
-	AMXFP16                             // Tile computational operations on FP16 numbers
-	AMXINT8                             // Tile computational operations on 8-bit integers
-	AMXTILE                             // Tile architecture
-	AVX                                 // AVX functions
-	AVX2                                // AVX2 functions
-	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
-	AVX512BITALG                        // AVX-512 Bit Algorithms
-	AVX512BW                            // AVX-512 Byte and Word Instructions
-	AVX512CD                            // AVX-512 Conflict Detection Instructions
-	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
-	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
-	AVX512F                             // AVX-512 Foundation
-	AVX512FP16                          // AVX-512 FP16 Instructions
-	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF                            // AVX-512 Prefetch Instructions
-	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VL                            // AVX-512 Vector Length Extensions
-	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
-	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
-	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
-	AVXIFMA                             // AVX-IFMA instructions
-	AVXNECONVERT                        // AVX-NE-CONVERT instructions
-	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
-	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
-	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
-	BHI_CTRL                            // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
-	BMI1                                // Bit Manipulation Instruction Set 1
-	BMI2                                // Bit Manipulation Instruction Set 2
-	CETIBT                              // Intel CET Indirect Branch Tracking
-	CETSS                               // Intel CET Shadow Stack
-	CLDEMOTE                            // Cache Line Demote
-	CLMUL                               // Carry-less Multiplication
-	CLZERO                              // CLZERO instruction supported
-	CMOV                                // i686 CMOV
-	CMPCCXADD                           // CMPCCXADD instructions
-	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
-	CMPXCHG8                            // CMPXCHG8 instruction
-	CPBOOST                             // Core Performance Boost
-	CPPC                                // AMD: Collaborative Processor Performance Control
-	CX16                                // CMPXCHG16B Instruction
-	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
-	ENQCMD                              // Enqueue Command
-	ERMS                                // Enhanced REP MOVSB/STOSB
-	F16C                                // Half-precision floating-point conversion
-	FLUSH_L1D                           // Flush L1D cache
-	FMA3                                // Intel FMA 3. Does not imply AVX.
-	FMA4                                // Bulldozer FMA4 functions
-	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
-	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
-	FSRM                                // Fast Short Rep Mov
-	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
-	FXSROPT                             // FXSAVE/FXRSTOR optimizations
-	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
-	HLE                                 // Hardware Lock Elision
-	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
-	HTT                                 // Hyperthreading (enabled)
-	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
-	HYBRID_CPU                          // This part has CPUs of more than one type.
-	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
-	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
-	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
-	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	IBRS                                // AMD: Indirect Branch Restricted Speculation
-	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
-	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
-	IBS                                 // Instruction Based Sampling (AMD)
-	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
-	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
-	IBSFFV                              // Instruction Based Sampling Feature (AMD)
-	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
-	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
-	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
-	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
-	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
-	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
-	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
-	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
-	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
-	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
-	IDPRED_CTRL                         // IPRED_DIS
-	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
-	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
-	LAHF                                // LAHF/SAHF in long mode
-	LAM                                 // If set, CPU supports Linear Address Masking
-	LBRVIRT                             // LBR virtualization
-	LZCNT                               // LZCNT instruction
-	MCAOVERFLOW                         // MCA overflow recovery support.
-	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
-	MCOMMIT                             // MCOMMIT instruction supported
-	MD_CLEAR                            // VERW clears CPU buffers
-	MMX                                 // standard MMX
-	MMXEXT                              // SSE integer functions or AMD MMX ext
-	MOVBE                               // MOVBE instruction (big-endian)
-	MOVDIR64B                           // Move 64 Bytes as Direct Store
-	MOVDIRI                             // Move Doubleword as Direct Store
-	MOVSB_ZL                            // Fast Zero-Length MOVSB
-	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
-	MPX                                 // Intel MPX (Memory Protection Extensions)
-	MSRIRC                              // Instruction Retired Counter MSR available
-	MSRLIST                             // Read/Write List of Model Specific Registers
-	MSR_PAGEFLUSH                       // Page Flush MSR available
-	NRIPS                               // Indicates support for NRIP save on VMEXIT
-	NX                                  // NX (No-Execute) bit
-	OSXSAVE                             // XSAVE enabled by OS
-	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
-	POPCNT                              // POPCNT instruction
-	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
-	PREFETCHI                           // PREFETCHIT0/1 instructions
-	PSFD                                // Predictive Store Forward Disable
-	RDPRU                               // RDPRU instruction supported
-	RDRAND                              // RDRAND instruction is available
-	RDSEED                              // RDSEED instruction is available
-	RDTSCP                              // RDTSCP Instruction
-	RRSBA_CTRL                          // Restricted RSB Alternate
-	RTM                                 // Restricted Transactional Memory
-	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
-	SERIALIZE                           // Serialize Instruction Execution
-	SEV                                 // AMD Secure Encrypted Virtualization supported
-	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
-	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
-	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
-	SEV_ES                              // AMD SEV Encrypted State supported
-	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
-	SEV_SNP                             // AMD SEV Secure Nested Paging supported
-	SGX                                 // Software Guard Extensions
-	SGXLC                               // Software Guard Extensions Launch Control
-	SHA                                 // Intel SHA Extensions
-	SME                                 // AMD Secure Memory Encryption supported
-	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
-	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
-	SRBDS_CTRL                          // SRBDS mitigation MSR available
-	SSE                                 // SSE functions
-	SSE2                                // P4 SSE functions
-	SSE3                                // Prescott SSE3 functions
-	SSE4                                // Penryn SSE4.1 functions
-	SSE42                               // Nehalem SSE4.2 functions
-	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
-	SSSE3                               // Conroe SSSE3 functions
-	STIBP                               // Single Thread Indirect Branch Predictors
-	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
-	STOSB_SHORT                         // Fast short STOSB
-	SUCCOR                              // Software uncorrectable error containment and recovery capability.
-	SVM                                 // AMD Secure Virtual Machine
-	SVMDA                               // Indicates support for the SVM decode assists.
-	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
-	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
-	SVMNP                               // AMD SVM nested paging
-	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
-	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
-	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
-	SYSEE                               // SYSENTER and SYSEXIT instructions
-	TBM                                 // AMD Trailing Bit Manipulation
-	TDX_GUEST                           // Intel Trust Domain Extensions Guest
-	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
-	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
-	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
-	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
-	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
-	VAES                                // Vector AES. AVX(512) versions requires additional checks.
-	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
-	VMPL                                // AMD VM Permission Levels supported
-	VMSA_REGPROT                        // AMD VMSA Register Protection supported
-	VMX                                 // Virtual Machine Extensions
-	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
-	VTE                                 // AMD Virtual Transparent Encryption supported
-	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
-	WBNOINVD                            // Write Back and Do Not Invalidate Cache
-	WRMSRNS                             // Non-Serializing Write to Model Specific Register
-	X87                                 // FPU
-	XGETBV1                             // Supports XGETBV with ECX = 1
-	XOP                                 // Bulldozer XOP functions
-	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
-	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
-	XSAVEOPT                            // XSAVEOPT available
-	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
+	// x86 features
+	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                                // Advanced Encryption Standard New Instructions
+	AMD3DNOW                             // AMD 3DNOW
+	AMD3DNOWEXT                          // AMD 3DNowExt
+	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
+	AMXFP16                              // Tile computational operations on FP16 numbers
+	AMXINT8                              // Tile computational operations on 8-bit integers
+	AMXTILE                              // Tile architecture
+	APX_F                                // Intel APX
+	AVX                                  // AVX functions
+	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
+	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
+	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
+	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
+	AVX2                                 // AVX2 functions
+	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                         // AVX-512 Bit Algorithms
+	AVX512BW                             // AVX-512 Byte and Word Instructions
+	AVX512CD                             // AVX-512 Conflict Detection Instructions
+	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                              // AVX-512 Foundation
+	AVX512FP16                           // AVX-512 FP16 Instructions
+	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                             // AVX-512 Prefetch Instructions
+	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                             // AVX-512 Vector Length Extensions
+	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXIFMA                              // AVX-IFMA instructions
+	AVXNECONVERT                         // AVX-NE-CONVERT instructions
+	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
+	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
+	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
+	BMI1                                 // Bit Manipulation Instruction Set 1
+	BMI2                                 // Bit Manipulation Instruction Set 2
+	CETIBT                               // Intel CET Indirect Branch Tracking
+	CETSS                                // Intel CET Shadow Stack
+	CLDEMOTE                             // Cache Line Demote
+	CLMUL                                // Carry-less Multiplication
+	CLZERO                               // CLZERO instruction supported
+	CMOV                                 // i686 CMOV
+	CMPCCXADD                            // CMPCCXADD instructions
+	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
+	CMPXCHG8                             // CMPXCHG8 instruction
+	CPBOOST                              // Core Performance Boost
+	CPPC                                 // AMD: Collaborative Processor Performance Control
+	CX16                                 // CMPXCHG16B Instruction
+	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
+	ENQCMD                               // Enqueue Command
+	ERMS                                 // Enhanced REP MOVSB/STOSB
+	F16C                                 // Half-precision floating-point conversion
+	FLUSH_L1D                            // Flush L1D cache
+	FMA3                                 // Intel FMA 3. Does not imply AVX.
+	FMA4                                 // Bulldozer FMA4 functions
+	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	FSRM                                 // Fast Short Rep Mov
+	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                              // FXSAVE/FXRSTOR optimizations
+	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	HLE                                  // Hardware Lock Elision
+	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	HTT                                  // Hyperthreading (enabled)
+	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
+	HYBRID_CPU                           // This part has CPUs of more than one type.
+	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
+	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
+	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
+	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
+	IBRS                                 // AMD: Indirect Branch Restricted Speculation
+	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
+	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
+	IBS                                  // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
+	IBSFFV                               // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
+	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
+	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
+	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
+	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
+	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
+	IDPRED_CTRL                          // IPRED_DIS
+	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
+	KEYLOCKER                            // Key locker
+	KEYLOCKERW                           // Key locker wide
+	LAHF                                 // LAHF/SAHF in long mode
+	LAM                                  // If set, CPU supports Linear Address Masking
+	LBRVIRT                              // LBR virtualization
+	LZCNT                                // LZCNT instruction
+	MCAOVERFLOW                          // MCA overflow recovery support.
+	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
+	MCOMMIT                              // MCOMMIT instruction supported
+	MD_CLEAR                             // VERW clears CPU buffers
+	MMX                                  // standard MMX
+	MMXEXT                               // SSE integer functions or AMD MMX ext
+	MOVBE                                // MOVBE instruction (big-endian)
+	MOVDIR64B                            // Move 64 Bytes as Direct Store
+	MOVDIRI                              // Move Doubleword as Direct Store
+	MOVSB_ZL                             // Fast Zero-Length MOVSB
+	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
+	MPX                                  // Intel MPX (Memory Protection Extensions)
+	MSRIRC                               // Instruction Retired Counter MSR available
+	MSRLIST                              // Read/Write List of Model Specific Registers
+	MSR_PAGEFLUSH                        // Page Flush MSR available
+	NRIPS                                // Indicates support for NRIP save on VMEXIT
+	NX                                   // NX (No-Execute) bit
+	OSXSAVE                              // XSAVE enabled by OS
+	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
+	POPCNT                               // POPCNT instruction
+	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	PREFETCHI                            // PREFETCHIT0/1 instructions
+	PSFD                                 // Predictive Store Forward Disable
+	RDPRU                                // RDPRU instruction supported
+	RDRAND                               // RDRAND instruction is available
+	RDSEED                               // RDSEED instruction is available
+	RDTSCP                               // RDTSCP Instruction
+	RRSBA_CTRL                           // Restricted RSB Alternate
+	RTM                                  // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
+	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
+	SERIALIZE                            // Serialize Instruction Execution
+	SEV                                  // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
+	SEV_ES                               // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
+	SEV_SNP                              // AMD SEV Secure Nested Paging supported
+	SGX                                  // Software Guard Extensions
+	SGXLC                                // Software Guard Extensions Launch Control
+	SHA                                  // Intel SHA Extensions
+	SME                                  // AMD Secure Memory Encryption supported
+	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
+	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
+	SRBDS_CTRL                           // SRBDS mitigation MSR available
+	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
+	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
+	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
+	SSE                                  // SSE functions
+	SSE2                                 // P4 SSE functions
+	SSE3                                 // Prescott SSE3 functions
+	SSE4                                 // Penryn SSE4.1 functions
+	SSE42                                // Nehalem SSE4.2 functions
+	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                                // Conroe SSSE3 functions
+	STIBP                                // Single Thread Indirect Branch Predictors
+	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+	STOSB_SHORT                          // Fast short STOSB
+	SUCCOR                               // Software uncorrectable error containment and recovery capability.
+	SVM                                  // AMD Secure Virtual Machine
+	SVMDA                                // Indicates support for the SVM decode assists.
+	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                                // AMD SVM nested paging
+	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                                // SYSENTER and SYSEXIT instructions
+	TBM                                  // AMD Trailing Bit Manipulation
+	TDX_GUEST                            // Intel Trust Domain Extensions Guest
+	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
+	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
+	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                 // AMD VM Permission Levels supported
+	VMSA_REGPROT                         // AMD VMSA Register Protection supported
+	VMX                                  // Virtual Machine Extensions
+	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                  // AMD Virtual Transparent Encryption supported
+	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                             // Write Back and Do Not Invalidate Cache
+	WRMSRNS                              // Non-Serializing Write to Model Specific Register
+	X87                                  // FPU
+	XGETBV1                              // Supports XGETBV with ECX = 1
+	XOP                                  // Bulldozer XOP functions
+	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                             // XSAVEOPT available
+	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS

 	// ARM features:
 	AESARM   // AES instructions
@ -302,9 +314,11 @@ type CPUInfo struct {
 		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
 		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
 	}
-	SGX       SGXSupport
-	maxFunc   uint32
-	maxExFunc uint32
+	SGX              SGXSupport
+	AMDMemEncryption AMDMemEncryptionSupport
+	AVX10Level       uint8
+	maxFunc          uint32
+	maxExFunc        uint32
 }

 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
@ -1071,6 +1085,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
 	return
 }

+type AMDMemEncryptionSupport struct {
+	Available          bool
+	CBitPossition      uint32
+	NumVMPL            uint32
+	PhysAddrReduction  uint32
+	NumEntryptedGuests uint32
+	MinSevNoEsAsid     uint32
+}
+
+func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
+	rval.Available = available
+	if !available {
+		return
+	}
+
+	_, b, c, d := cpuidex(0x8000001f, 0)
+
+	rval.CBitPossition = b & 0x3f
+	rval.PhysAddrReduction = (b >> 6) & 0x3F
+	rval.NumVMPL = (b >> 12) & 0xf
+	rval.NumEntryptedGuests = c
+	rval.MinSevNoEsAsid = d
+
+	return
+}
+
 func support() flagSet {
 	var fs flagSet
 	mfi := maxFunctionID()
@ -1165,6 +1205,7 @@ func support() flagSet {
 		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
 		fs.setIf(ecx&(1<<13) != 0, TME)
 		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+		fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
 		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
 		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
 		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
@ -1202,6 +1243,8 @@ func support() flagSet {
 		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
 		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
 		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
+		fs.setIf(edx1&(1<<19) != 0, AVX10)
+		fs.setIf(edx1&(1<<21) != 0, APX_F)

 		// Only detect AVX-512 features if XGETBV is supported
 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
@ -1252,6 +1295,19 @@ func support() flagSet {
 		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
 		fs.setIf(edx&(1<<5) != 0, MCDT_NO)

+		// Add keylocker features.
+		if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
+			_, ebx, _, _ := cpuidex(0x19, 0)
+			fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
+		}
+
+		// Add AVX10 features.
+		if fs.inSet(AVX10) && mfi >= 0x24 {
+			_, ebx, _, _ := cpuidex(0x24, 0)
+			fs.setIf(ebx&(1<<16) != 0, AVX10_128)
+			fs.setIf(ebx&(1<<17) != 0, AVX10_256)
+			fs.setIf(ebx&(1<<18) != 0, AVX10_512)
+		}
 	}

 	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
@ -1394,6 +1450,29 @@ func support() flagSet {
 		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
 	}

+	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
+		a, _, _, _ := cpuid(0x80000021)
+		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
+		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
+		fs.setIf((a>>29)&1 == 1, SRSO_NO)
+		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
+		fs.setIf((a>>27)&1 == 1, SBPB)
+	}
+
+	if mfi >= 0x20 {
+		// Microsoft has decided to purposefully hide the information
+		// of the guest TEE when VMs are being created using Hyper-V.
+		//
+		// This leads us to check for the Hyper-V cpuid features
+		// (0x4000000C), and then for the `ebx` value set.
+		//
+		// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
+		// we're mostly interested about,according to:
+		// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
+		_, ebx, _, _ := cpuid(0x4000000C)
+		fs.setIf(ebx == 0xbe3, TDX_GUEST)
+	}
+
 	if mfi >= 0x21 {
 		// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
 		_, ebx, ecx, edx := cpuid(0x21)
@ -1404,6 +1483,14 @@ func support() flagSet {
 	return fs
 }

+func (c *CPUInfo) supportAVX10() uint8 {
+	if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
+		_, ebx, _, _ := cpuidex(0x24, 0)
+		return uint8(ebx)
+	}
+	return 0
+}
+
 func valAsString(values ...uint32) []byte {
 	r := make([]byte, 4*len(values))
 	for i, v := range values {
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) {
 	c.Family, c.Model, c.Stepping = familyModel()
 	c.featureSet = support()
 	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
+	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
+	c.AVX10Level = c.supportAVX10()
 	c.cacheSize()
 	c.frequencies()
 }
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@ -16,210 +16,222 @@ func _() {
 	_ = x[AMXFP16-6]
 	_ = x[AMXINT8-7]
 	_ = x[AMXTILE-8]
-	_ = x[AVX-9]
-	_ = x[AVX2-10]
-	_ = x[AVX512BF16-11]
-	_ = x[AVX512BITALG-12]
-	_ = x[AVX512BW-13]
-	_ = x[AVX512CD-14]
-	_ = x[AVX512DQ-15]
-	_ = x[AVX512ER-16]
-	_ = x[AVX512F-17]
-	_ = x[AVX512FP16-18]
-	_ = x[AVX512IFMA-19]
-	_ = x[AVX512PF-20]
-	_ = x[AVX512VBMI-21]
-	_ = x[AVX512VBMI2-22]
-	_ = x[AVX512VL-23]
-	_ = x[AVX512VNNI-24]
-	_ = x[AVX512VP2INTERSECT-25]
-	_ = x[AVX512VPOPCNTDQ-26]
-	_ = x[AVXIFMA-27]
-	_ = x[AVXNECONVERT-28]
-	_ = x[AVXSLOW-29]
-	_ = x[AVXVNNI-30]
-	_ = x[AVXVNNIINT8-31]
-	_ = x[BHI_CTRL-32]
-	_ = x[BMI1-33]
-	_ = x[BMI2-34]
-	_ = x[CETIBT-35]
-	_ = x[CETSS-36]
-	_ = x[CLDEMOTE-37]
-	_ = x[CLMUL-38]
-	_ = x[CLZERO-39]
-	_ = x[CMOV-40]
-	_ = x[CMPCCXADD-41]
-	_ = x[CMPSB_SCADBS_SHORT-42]
-	_ = x[CMPXCHG8-43]
-	_ = x[CPBOOST-44]
-	_ = x[CPPC-45]
-	_ = x[CX16-46]
-	_ = x[EFER_LMSLE_UNS-47]
-	_ = x[ENQCMD-48]
-	_ = x[ERMS-49]
-	_ = x[F16C-50]
-	_ = x[FLUSH_L1D-51]
-	_ = x[FMA3-52]
-	_ = x[FMA4-53]
-	_ = x[FP128-54]
-	_ = x[FP256-55]
-	_ = x[FSRM-56]
-	_ = x[FXSR-57]
-	_ = x[FXSROPT-58]
-	_ = x[GFNI-59]
-	_ = x[HLE-60]
-	_ = x[HRESET-61]
-	_ = x[HTT-62]
-	_ = x[HWA-63]
-	_ = x[HYBRID_CPU-64]
-	_ = x[HYPERVISOR-65]
-	_ = x[IA32_ARCH_CAP-66]
-	_ = x[IA32_CORE_CAP-67]
-	_ = x[IBPB-68]
-	_ = x[IBRS-69]
-	_ = x[IBRS_PREFERRED-70]
-	_ = x[IBRS_PROVIDES_SMP-71]
-	_ = x[IBS-72]
-	_ = x[IBSBRNTRGT-73]
-	_ = x[IBSFETCHSAM-74]
-	_ = x[IBSFFV-75]
-	_ = x[IBSOPCNT-76]
-	_ = x[IBSOPCNTEXT-77]
-	_ = x[IBSOPSAM-78]
-	_ = x[IBSRDWROPCNT-79]
-	_ = x[IBSRIPINVALIDCHK-80]
-	_ = x[IBS_FETCH_CTLX-81]
-	_ = x[IBS_OPDATA4-82]
-	_ = x[IBS_OPFUSE-83]
-	_ = x[IBS_PREVENTHOST-84]
-	_ = x[IBS_ZEN4-85]
-	_ = x[IDPRED_CTRL-86]
-	_ = x[INT_WBINVD-87]
-	_ = x[INVLPGB-88]
-	_ = x[LAHF-89]
-	_ = x[LAM-90]
-	_ = x[LBRVIRT-91]
-	_ = x[LZCNT-92]
-	_ = x[MCAOVERFLOW-93]
-	_ = x[MCDT_NO-94]
-	_ = x[MCOMMIT-95]
-	_ = x[MD_CLEAR-96]
-	_ = x[MMX-97]
-	_ = x[MMXEXT-98]
-	_ = x[MOVBE-99]
-	_ = x[MOVDIR64B-100]
-	_ = x[MOVDIRI-101]
-	_ = x[MOVSB_ZL-102]
-	_ = x[MOVU-103]
-	_ = x[MPX-104]
-	_ = x[MSRIRC-105]
-	_ = x[MSRLIST-106]
-	_ = x[MSR_PAGEFLUSH-107]
-	_ = x[NRIPS-108]
-	_ = x[NX-109]
-	_ = x[OSXSAVE-110]
-	_ = x[PCONFIG-111]
-	_ = x[POPCNT-112]
-	_ = x[PPIN-113]
-	_ = x[PREFETCHI-114]
-	_ = x[PSFD-115]
-	_ = x[RDPRU-116]
-	_ = x[RDRAND-117]
-	_ = x[RDSEED-118]
-	_ = x[RDTSCP-119]
-	_ = x[RRSBA_CTRL-120]
-	_ = x[RTM-121]
-	_ = x[RTM_ALWAYS_ABORT-122]
-	_ = x[SERIALIZE-123]
-	_ = x[SEV-124]
-	_ = x[SEV_64BIT-125]
-	_ = x[SEV_ALTERNATIVE-126]
-	_ = x[SEV_DEBUGSWAP-127]
-	_ = x[SEV_ES-128]
-	_ = x[SEV_RESTRICTED-129]
-	_ = x[SEV_SNP-130]
-	_ = x[SGX-131]
-	_ = x[SGXLC-132]
-	_ = x[SHA-133]
-	_ = x[SME-134]
-	_ = x[SME_COHERENT-135]
-	_ = x[SPEC_CTRL_SSBD-136]
-	_ = x[SRBDS_CTRL-137]
-	_ = x[SSE-138]
-	_ = x[SSE2-139]
-	_ = x[SSE3-140]
-	_ = x[SSE4-141]
-	_ = x[SSE42-142]
-	_ = x[SSE4A-143]
-	_ = x[SSSE3-144]
-	_ = x[STIBP-145]
-	_ = x[STIBP_ALWAYSON-146]
-	_ = x[STOSB_SHORT-147]
-	_ = x[SUCCOR-148]
-	_ = x[SVM-149]
-	_ = x[SVMDA-150]
-	_ = x[SVMFBASID-151]
-	_ = x[SVML-152]
-	_ = x[SVMNP-153]
-	_ = x[SVMPF-154]
-	_ = x[SVMPFT-155]
-	_ = x[SYSCALL-156]
-	_ = x[SYSEE-157]
-	_ = x[TBM-158]
-	_ = x[TDX_GUEST-159]
-	_ = x[TLB_FLUSH_NESTED-160]
-	_ = x[TME-161]
-	_ = x[TOPEXT-162]
-	_ = x[TSCRATEMSR-163]
-	_ = x[TSXLDTRK-164]
-	_ = x[VAES-165]
-	_ = x[VMCBCLEAN-166]
-	_ = x[VMPL-167]
-	_ = x[VMSA_REGPROT-168]
-	_ = x[VMX-169]
-	_ = x[VPCLMULQDQ-170]
-	_ = x[VTE-171]
-	_ = x[WAITPKG-172]
-	_ = x[WBNOINVD-173]
-	_ = x[WRMSRNS-174]
-	_ = x[X87-175]
-	_ = x[XGETBV1-176]
-	_ = x[XOP-177]
-	_ = x[XSAVE-178]
-	_ = x[XSAVEC-179]
-	_ = x[XSAVEOPT-180]
-	_ = x[XSAVES-181]
-	_ = x[AESARM-182]
-	_ = x[ARMCPUID-183]
-	_ = x[ASIMD-184]
-	_ = x[ASIMDDP-185]
-	_ = x[ASIMDHP-186]
-	_ = x[ASIMDRDM-187]
-	_ = x[ATOMICS-188]
-	_ = x[CRC32-189]
-	_ = x[DCPOP-190]
-	_ = x[EVTSTRM-191]
-	_ = x[FCMA-192]
-	_ = x[FP-193]
-	_ = x[FPHP-194]
-	_ = x[GPA-195]
-	_ = x[JSCVT-196]
-	_ = x[LRCPC-197]
-	_ = x[PMULL-198]
-	_ = x[SHA1-199]
-	_ = x[SHA2-200]
-	_ = x[SHA3-201]
-	_ = x[SHA512-202]
-	_ = x[SM3-203]
-	_ = x[SM4-204]
-	_ = x[SVE-205]
-	_ = x[lastID-206]
+	_ = x[APX_F-9]
+	_ = x[AVX-10]
+	_ = x[AVX10-11]
+	_ = x[AVX10_128-12]
+	_ = x[AVX10_256-13]
+	_ = x[AVX10_512-14]
+	_ = x[AVX2-15]
+	_ = x[AVX512BF16-16]
+	_ = x[AVX512BITALG-17]
+	_ = x[AVX512BW-18]
+	_ = x[AVX512CD-19]
+	_ = x[AVX512DQ-20]
+	_ = x[AVX512ER-21]
+	_ = x[AVX512F-22]
+	_ = x[AVX512FP16-23]
+	_ = x[AVX512IFMA-24]
+	_ = x[AVX512PF-25]
+	_ = x[AVX512VBMI-26]
+	_ = x[AVX512VBMI2-27]
+	_ = x[AVX512VL-28]
+	_ = x[AVX512VNNI-29]
+	_ = x[AVX512VP2INTERSECT-30]
+	_ = x[AVX512VPOPCNTDQ-31]
+	_ = x[AVXIFMA-32]
+	_ = x[AVXNECONVERT-33]
+	_ = x[AVXSLOW-34]
+	_ = x[AVXVNNI-35]
+	_ = x[AVXVNNIINT8-36]
+	_ = x[BHI_CTRL-37]
+	_ = x[BMI1-38]
+	_ = x[BMI2-39]
+	_ = x[CETIBT-40]
+	_ = x[CETSS-41]
+	_ = x[CLDEMOTE-42]
+	_ = x[CLMUL-43]
+	_ = x[CLZERO-44]
+	_ = x[CMOV-45]
+	_ = x[CMPCCXADD-46]
+	_ = x[CMPSB_SCADBS_SHORT-47]
+	_ = x[CMPXCHG8-48]
+	_ = x[CPBOOST-49]
+	_ = x[CPPC-50]
+	_ = x[CX16-51]
+	_ = x[EFER_LMSLE_UNS-52]
+	_ = x[ENQCMD-53]
+	_ = x[ERMS-54]
+	_ = x[F16C-55]
+	_ = x[FLUSH_L1D-56]
+	_ = x[FMA3-57]
+	_ = x[FMA4-58]
+	_ = x[FP128-59]
+	_ = x[FP256-60]
+	_ = x[FSRM-61]
+	_ = x[FXSR-62]
+	_ = x[FXSROPT-63]
+	_ = x[GFNI-64]
+	_ = x[HLE-65]
+	_ = x[HRESET-66]
+	_ = x[HTT-67]
+	_ = x[HWA-68]
+	_ = x[HYBRID_CPU-69]
+	_ = x[HYPERVISOR-70]
+	_ = x[IA32_ARCH_CAP-71]
+	_ = x[IA32_CORE_CAP-72]
+	_ = x[IBPB-73]
+	_ = x[IBPB_BRTYPE-74]
+	_ = x[IBRS-75]
+	_ = x[IBRS_PREFERRED-76]
+	_ = x[IBRS_PROVIDES_SMP-77]
+	_ = x[IBS-78]
+	_ = x[IBSBRNTRGT-79]
+	_ = x[IBSFETCHSAM-80]
+	_ = x[IBSFFV-81]
+	_ = x[IBSOPCNT-82]
+	_ = x[IBSOPCNTEXT-83]
+	_ = x[IBSOPSAM-84]
+	_ = x[IBSRDWROPCNT-85]
+	_ = x[IBSRIPINVALIDCHK-86]
+	_ = x[IBS_FETCH_CTLX-87]
+	_ = x[IBS_OPDATA4-88]
+	_ = x[IBS_OPFUSE-89]
+	_ = x[IBS_PREVENTHOST-90]
+	_ = x[IBS_ZEN4-91]
+	_ = x[IDPRED_CTRL-92]
+	_ = x[INT_WBINVD-93]
+	_ = x[INVLPGB-94]
+	_ = x[KEYLOCKER-95]
+	_ = x[KEYLOCKERW-96]
+	_ = x[LAHF-97]
+	_ = x[LAM-98]
+	_ = x[LBRVIRT-99]
+	_ = x[LZCNT-100]
+	_ = x[MCAOVERFLOW-101]
+	_ = x[MCDT_NO-102]
+	_ = x[MCOMMIT-103]
+	_ = x[MD_CLEAR-104]
+	_ = x[MMX-105]
+	_ = x[MMXEXT-106]
+	_ = x[MOVBE-107]
+	_ = x[MOVDIR64B-108]
+	_ = x[MOVDIRI-109]
+	_ = x[MOVSB_ZL-110]
+	_ = x[MOVU-111]
+	_ = x[MPX-112]
+	_ = x[MSRIRC-113]
+	_ = x[MSRLIST-114]
+	_ = x[MSR_PAGEFLUSH-115]
+	_ = x[NRIPS-116]
+	_ = x[NX-117]
+	_ = x[OSXSAVE-118]
+	_ = x[PCONFIG-119]
+	_ = x[POPCNT-120]
+	_ = x[PPIN-121]
+	_ = x[PREFETCHI-122]
+	_ = x[PSFD-123]
+	_ = x[RDPRU-124]
+	_ = x[RDRAND-125]
+	_ = x[RDSEED-126]
+	_ = x[RDTSCP-127]
+	_ = x[RRSBA_CTRL-128]
+	_ = x[RTM-129]
+	_ = x[RTM_ALWAYS_ABORT-130]
+	_ = x[SBPB-131]
+	_ = x[SERIALIZE-132]
+	_ = x[SEV-133]
+	_ = x[SEV_64BIT-134]
+	_ = x[SEV_ALTERNATIVE-135]
+	_ = x[SEV_DEBUGSWAP-136]
+	_ = x[SEV_ES-137]
+	_ = x[SEV_RESTRICTED-138]
+	_ = x[SEV_SNP-139]
+	_ = x[SGX-140]
+	_ = x[SGXLC-141]
+	_ = x[SHA-142]
+	_ = x[SME-143]
+	_ = x[SME_COHERENT-144]
+	_ = x[SPEC_CTRL_SSBD-145]
+	_ = x[SRBDS_CTRL-146]
+	_ = x[SRSO_MSR_FIX-147]
+	_ = x[SRSO_NO-148]
+	_ = x[SRSO_USER_KERNEL_NO-149]
+	_ = x[SSE-150]
+	_ = x[SSE2-151]
+	_ = x[SSE3-152]
+	_ = x[SSE4-153]
+	_ = x[SSE42-154]
+	_ = x[SSE4A-155]
+	_ = x[SSSE3-156]
+	_ = x[STIBP-157]
+	_ = x[STIBP_ALWAYSON-158]
+	_ = x[STOSB_SHORT-159]
+	_ = x[SUCCOR-160]
+	_ = x[SVM-161]
+	_ = x[SVMDA-162]
+	_ = x[SVMFBASID-163]
+	_ = x[SVML-164]
+	_ = x[SVMNP-165]
+	_ = x[SVMPF-166]
+	_ = x[SVMPFT-167]
+	_ = x[SYSCALL-168]
+	_ = x[SYSEE-169]
+	_ = x[TBM-170]
+	_ = x[TDX_GUEST-171]
+	_ = x[TLB_FLUSH_NESTED-172]
+	_ = x[TME-173]
+	_ = x[TOPEXT-174]
+	_ = x[TSCRATEMSR-175]
+	_ = x[TSXLDTRK-176]
+	_ = x[VAES-177]
+	_ = x[VMCBCLEAN-178]
+	_ = x[VMPL-179]
+	_ = x[VMSA_REGPROT-180]
+	_ = x[VMX-181]
+	_ = x[VPCLMULQDQ-182]
+	_ = x[VTE-183]
+	_ = x[WAITPKG-184]
+	_ = x[WBNOINVD-185]
+	_ = x[WRMSRNS-186]
+	_ = x[X87-187]
+	_ = x[XGETBV1-188]
+	_ = x[XOP-189]
+	_ = x[XSAVE-190]
+	_ = x[XSAVEC-191]
+	_ = x[XSAVEOPT-192]
+	_ = x[XSAVES-193]
+	_ = x[AESARM-194]
+	_ = x[ARMCPUID-195]
+	_ = x[ASIMD-196]
+	_ = x[ASIMDDP-197]
+	_ = x[ASIMDHP-198]
+	_ = x[ASIMDRDM-199]
+	_ = x[ATOMICS-200]
+	_ = x[CRC32-201]
+	_ = x[DCPOP-202]
+	_ = x[EVTSTRM-203]
+	_ = x[FCMA-204]
+	_ = x[FP-205]
+	_ = x[FPHP-206]
+	_ = x[GPA-207]
+	_ = x[JSCVT-208]
+	_ = x[LRCPC-209]
+	_ = x[PMULL-210]
+	_ = x[SHA1-211]
+	_ = x[SHA2-212]
+	_ = x[SHA3-213]
+	_ = x[SHA512-214]
+	_ = x[SM3-215]
+	_ = x[SM4-216]
+	_ = x[SVE-217]
+	_ = x[lastID-218]
 	_ = x[firstID-0]
 }

-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"

-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}

 func (i FeatureID) String() string {
 	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
--- a/vendor/github.com/klauspost/reedsolomon/.gitignore
+++ b/vendor/github.com/klauspost/reedsolomon/.gitignore
@ -0,0 +1,26 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+
+.idea
--- a/vendor/github.com/klauspost/reedsolomon/LICENSE
+++ b/vendor/github.com/klauspost/reedsolomon/LICENSE
@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+Copyright (c) 2015 Backblaze
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@ -0,0 +1,566 @@
+# Reed-Solomon
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
+
+Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
+
+This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by 
+[Backblaze](http://backblaze.com), with some additional optimizations.
+
+For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+
+For encoding high shard counts (>256) a Leopard implementation is used.
+For most platforms this performs close to the original Leopard implementation in terms of speed. 
+
+Package home: https://github.com/klauspost/reedsolomon
+
+Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
+
+# Installation
+To get the package use the standard:
+```bash
+go get -u github.com/klauspost/reedsolomon
+```
+
+Using Go modules is recommended.
+
+# Changes
+
+## 2022
+
+* [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing.
+* [Leopard GF8](https://github.com/klauspost/reedsolomon#leopard-gf8) mode added, for faster processing of medium shard counts.
+* [Leopard GF16](https://github.com/klauspost/reedsolomon#leopard-compatible-gf16) mode added, for up to 65536 shards. 
+* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
+
+## 2021
+
+* Use `GOAMD64=v4` to enable faster AVX2.
+* Add progressive shard encoding.
+* Wider AVX2 loops
+* Limit concurrency on AVX2, since we are likely memory bound.
+* Allow 0 parity shards.
+* Allow disabling inversion cache.
+* Faster AVX2 encoding.
+
+<details>
+	<summary>See older changes</summary>
+
+## May 2020
+
+* ARM64 optimizations, up to 2.5x faster.
+* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
+* Much better performance when using a limited number of goroutines.
+* AVX512 is now using multiple cores.
+* Stream processing overhaul, big speedups in most cases.
+* AVX512 optimizations
+
+## March 6, 2019
+
+The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
+
+## February 8, 2019
+
+AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
+See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
+
+## December 18, 2018
+
+Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
+
+## November 18, 2017
+
+Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt 
+to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
+
+## October 1, 2017
+
+* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. 
+Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
+
+* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) 
+has been increased for better multi-core scaling.
+
+* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to 
+be used instead of allocating new memory.
+
+## August 26, 2017
+
+*  The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` 
+function contributed by [chenzhongtao](https://github.com/chenzhongtao).
+
+* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, 
+which gives a huge performance boost on this platform.
+
+## July 20, 2017
+
+`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. 
+This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
+
+```Go
+func (e *YourEnc) ReconstructData(shards [][]byte) error {
+	return ReconstructData(shards)
+}
+```
+
+You can of course also do your own implementation. 
+The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) 
+handles this without modifying the interface. 
+This is a good lesson on why returning interfaces is not a good design.
+
+</details>
+
+# Usage
+
+This section assumes you know the basics of Reed-Solomon encoding. 
+A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
+
+This package performs the calculation of the parity sets. The usage is therefore relatively simple.
+
+First of all, you need to choose your distribution of data and parity shards. 
+A 'good' distribution is very subjective, and will depend a lot on your usage scenario. 
+
+To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
+```Go
+    enc, err := reedsolomon.New(10, 3)
+```
+This encoder will work for all parity sets with this distribution of data and parity shards. 
+
+If you will primarily be using it with one shard size it is recommended to use 
+[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
+as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
+It is not required that all shards are this size. 
+
+Then you send and receive data that is a simple slice of byte slices; `[][]byte`. 
+In the example above, the top slice must have a length of 13.
+
+```Go
+    data := make([][]byte, 13)
+```
+You should then fill the 10 first slices with *equally sized* data, 
+and create parity shards that will be populated with parity data. In this case we create the data in memory, 
+but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
+
+```Go
+    // Create all shards, size them at 50000 each
+    for i := range input {
+      data[i] := make([]byte, 50000)
+    }
+    
+    // The above allocations can also be done by the encoder:
+    // data := enc.(reedsolomon.Extended).AllocAligned(50000)
+    
+    // Fill some data into the data shards
+    for i, in := range data[:10] {
+      for j:= range in {
+         in[j] = byte((i+j)&0xff)
+      }
+    }
+```
+
+To populate the parity shards, you simply call `Encode()` with your data.
+```Go
+    err = enc.Encode(data)
+```
+The only cases where you should get an error is, if the data shards aren't of equal size. 
+The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
+
+```Go
+    ok, err = enc.Verify(data)
+```
+
+The final (and important) part is to be able to reconstruct missing shards. 
+For this to work, you need to know which parts of your data is missing. 
+The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, 
+you need to implement a hash check for each shard. 
+
+If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
+
+To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct the missing shards
+    err := enc.Reconstruct(data)
+```
+The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
+
+If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct just the missing data shards
+    err := enc.ReconstructData(data)
+```
+
+If you don't need all data shards you can use `ReconstructSome()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct just the shard 3
+    err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
+```
+
+So to sum up reconstruction:
+* The number of data/parity shards must match the numbers used for encoding.
+* The order of shards must be the same as used when encoding.
+* You may only supply data you know is valid.
+* Invalid shards should be set to nil.
+
+For complete examples of an encoder and decoder see the 
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Splitting/Joining Data
+
+You might have a large slice of data. 
+To help you split this, there are some helper functions that can split and join a single byte slice.
+
+```Go
+   bigfile, _ := ioutil.Readfile("myfile.data")
+   
+   // Split the file
+   split, err := enc.Split(bigfile)
+```
+This will split the file into the number of data shards set when creating the encoder and create empty parity shards. 
+
+An important thing to note is that you have to *keep track of the exact input size*. 
+If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
+
+To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: 
+```Go
+   // Join a data set and write it to io.Discard.
+   err = enc.Join(io.Discard, data, len(bigfile))
+```
+
+## Aligned Allocations
+
+For AMD64 aligned inputs can make a big speed difference.
+
+This is an example of the speed difference when inputs are unaligned/aligned:
+
+```
+BenchmarkEncode100x20x10000-32    	    7058	    172648 ns/op	6950.57 MB/s
+BenchmarkEncode100x20x10000-32    	    8406	    137911 ns/op	8701.24 MB/s
+```
+
+This is mostly the case when dealing with odd-sized shards. 
+
+To facilitate this the package provides an `AllocAligned(shards, each int) [][]byte`. 
+This will allocate a number of shards, each with the size `each`.
+Each shard will then be aligned to a 64 byte boundary.
+
+Each encoder also has a `AllocAligned(each int) [][]byte` as an extended interface which will return the same, 
+but with the shard count configured in the encoder.   
+
+It is not possible to re-aligned already allocated slices, for example when using `Split`.
+When it is not possible to write to aligned shards, you should not copy to them.
+
+# Progressive encoding
+
+It is possible to encode individual shards using EncodeIdx:
+
+```Go
+	// EncodeIdx will add parity for a single data shard.
+	// Parity shards should start out as 0. The caller must zero them.
+	// Data shards must be delivered exactly once. There is no check for this.
+	// The parity shards will always be updated and the data shards will remain the same.
+	EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
+```
+
+This allows progressively encoding the parity by sending individual data shards.
+There is no requirement on shards being delivered in order, 
+but when sent in order it allows encoding shards one at the time,
+effectively allowing the operation to be streaming. 
+
+The result will be the same as encoding all shards at once.
+There is a minor speed penalty using this method, so send 
+shards at once if they are available.
+
+## Example
+
+```Go
+func test() {
+    // Create an encoder with 7 data and 3 parity slices.
+    enc, _ := reedsolomon.New(7, 3)
+
+    // This will be our output parity.
+    parity := make([][]byte, 3)
+    for i := range parity {
+        parity[i] = make([]byte, 10000)
+    }
+
+    for i := 0; i < 7; i++ {
+        // Send data shards one at the time.
+        _ = enc.EncodeIdx(make([]byte, 10000), i, parity)
+    }
+
+    // parity now contains parity, as if all data was sent in one call.
+}
+```
+
+# Streaming/Merging
+
+It might seem like a limitation that all data should be in memory, 
+but an important property is that *as long as the number of data/parity shards are the same, 
+you can merge/split data sets*, and they will remain valid as a separate set.
+
+```Go
+    // Split the data set of 50000 elements into two of 25000
+    splitA := make([][]byte, 13)
+    splitB := make([][]byte, 13)
+    
+    // Merge into a 100000 element set
+    merged := make([][]byte, 13)
+    
+    for i := range data {
+      splitA[i] = data[i][:25000]
+      splitB[i] = data[i][25000:]
+      
+      // Concatenate it to itself
+	  merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
+	  merged[i] = append(merged[i], data[i]...)
+    }
+    
+    // Each part should still verify as ok.
+    ok, err := enc.Verify(splitA)
+    if ok && err == nil {
+        log.Println("splitA ok")
+    }
+    
+    ok, err = enc.Verify(splitB)
+    if ok && err == nil {
+        log.Println("splitB ok")
+    }
+    
+    ok, err = enc.Verify(merge)
+    if ok && err == nil {
+        log.Println("merge ok")
+    }
+```
+
+This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. 
+For the best throughput, don't use too small blocks.
+
+This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. 
+This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
+
+# Streaming API
+
+There has been added support for a streaming API, to help perform fully streaming operations, 
+which enables you to do the same operations, but on streams. 
+To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function 
+to create the encoding/decoding interfaces. 
+
+You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams) 
+to ready an interface that reads/writes concurrently from the streams.
+
+You can specify the size of each operation using 
+[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
+This will set the size of each read/write operation.
+
+Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. 
+Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. 
+If an error occurs in relation to a stream, 
+a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) 
+or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) 
+will help you determine which stream was the offender.
+
+There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
+
+For complete examples of a streaming encoder and decoder see the 
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+GF16 (more than 256 shards) is not supported by the streaming interface. 
+
+# Advanced Options
+
+You can modify internal options which affects how jobs are split between and processed by goroutines.
+
+To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`. 
+If no Options are supplied, default options are used.
+
+Example of how to supply options:
+
+ ```Go
+     enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
+ ```
+
+# Leopard Compatible GF16
+
+When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
+
+This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
+
+* The original and recovery data must not exceed 65536 pieces.
+* The shard size *must*  each be a multiple of 64 bytes.
+* Each buffer should have the same number of bytes.
+* Even the last shard must be rounded up to the block size.
+
+|                 | Regular | Leopard |
+|-----------------|---------|---------|
+| Encode          | ✓       | ✓       |
+| EncodeIdx       | ✓       | -       |
+| Verify          | ✓       | ✓       |
+| Reconstruct     | ✓       | ✓       |
+| ReconstructData | ✓       | ✓       |
+| ReconstructSome | ✓       | ✓ (+)   |
+| Update          | ✓       | -       |
+| Split           | ✓       | ✓       |
+| Join            | ✓       | ✓       |
+
+* (+) Same as calling `ReconstructData`.
+
+The Split/Join functions will help to split an input to the proper sizes.
+
+Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`. 
+Reconstruction matrix calculation is more time-consuming, 
+so be sure to include that as part of any benchmark you run.  
+
+For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
+
+Leopard mode currently always runs as a single goroutine, since multiple 
+goroutines doesn't provide any worthwhile speedup.
+
+## Leopard GF8
+
+It is possible to replace the default reed-solomon encoder with a leopard compatible one.
+This will typically be faster when dealing with more than 20-30 shards.
+Note that the limitations listed above also applies to this mode. 
+See table below for speed with different number of shards.
+
+To enable Leopard GF8 mode use `WithLeopardGF(true)`.
+
+Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
+All implementation use inversion cache when available.
+Speed is total shard size for each operation. Data shard throughput is speed/2.
+AVX2 is used.
+
+| Encoder      | Shards      | Encode         | Recover All  | Recover One    |
+|--------------|-------------|----------------|--------------|----------------|
+| Cauchy       | 4+4         | 23076.83 MB/s  | 5444.02 MB/s | 10834.67 MB/s  |
+| Cauchy       | 8+8         | 15206.87 MB/s  | 4223.42 MB/s | 16181.62  MB/s |
+| Cauchy       | 16+16       | 7427.47 MB/s   | 3305.84 MB/s | 22480.41  MB/s |
+| Cauchy       | 32+32       | 3785.64 MB/s   | 2300.07 MB/s | 26181.31  MB/s |
+| Cauchy       | 64+64       | 1911.93 MB/s   | 1368.51 MB/s | 27992.93 MB/s  |
+| Cauchy       | 128+128     | 963.83 MB/s    | 1327.56 MB/s | 32866.86 MB/s  |
+| Leopard GF8  | 4+4         | 17061.28 MB/s  | 3099.06 MB/s | 4096.78 MB/s   |
+| Leopard GF8  | 8+8         | 10546.67 MB/s  | 2925.92 MB/s | 3964.00 MB/s   |
+| Leopard GF8  | 16+16       | 10961.37  MB/s | 2328.40 MB/s | 3110.22 MB/s   |
+| Leopard GF8  | 32+32       | 7111.47 MB/s   | 2374.61 MB/s | 3220.75 MB/s   |
+| Leopard GF8  | 64+64       | 7468.57 MB/s   | 2055.41 MB/s | 3061.81 MB/s   |
+| Leopard GF8  | 128+128     | 5479.99 MB/s   | 1953.21 MB/s | 2815.15 MB/s   |
+| Leopard GF16 | 256+256     | 6158.66 MB/s   | 454.14 MB/s  | 506.70 MB/s    |
+| Leopard GF16 | 512+512     | 4418.58 MB/s   | 685.75 MB/s  | 801.63 MB/s    |
+| Leopard GF16 | 1024+1024   | 4778.05 MB/s   | 814.51 MB/s  | 1080.19 MB/s   |
+| Leopard GF16 | 2048+2048   | 3417.05 MB/s   | 911.64 MB/s  | 1179.48 MB/s   |
+| Leopard GF16 | 4096+4096   | 3209.41 MB/s   | 729.13 MB/s  | 1135.06 MB/s   |
+| Leopard GF16 | 8192+8192   | 2034.11 MB/s   | 604.52 MB/s  | 842.13 MB/s    |
+| Leopard GF16 | 16384+16384 | 1525.88 MB/s   | 486.74 MB/s  | 750.01 MB/s    |
+| Leopard GF16 | 32768+32768 | 1138.67 MB/s   | 482.81 MB/s  | 712.73 MB/s    |
+
+"Traditional" encoding is faster until somewhere between 16 and 32 shards.
+Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
+
+Calculating the reconstruction matrix takes a significant amount of computation. 
+With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
+Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard. 
+
+# Performance
+
+Performance depends mainly on the number of parity shards. 
+In rough terms, doubling the number of parity shards will double the encoding time.
+
+Here are the throughput numbers with some different selections of data and parity shards. 
+For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
+
+| Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
+|------|--------|---------|------------|-----------|
+| 5    | 2      | 20,772  | 66,355     | 108,755   |
+| 8    | 8      | 6,815   | 38,338     | 70,516    |
+| 10   | 4      | 9,245   | 48,237     | 93,875    |
+| 50   | 20     | 2,063   | 12,130     | 22,828    |
+
+The throughput numbers here is the size of the encoded data and parity shards.
+
+If `runtime.GOMAXPROCS()` is set to a value higher than 1, 
+the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
+
+
+Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
+```
+benchmark                            all MB/s     data MB/s    speedup
+BenchmarkReconstruct10x2x10000-8     2011.67      10530.10     5.23x
+BenchmarkReconstruct50x5x50000-8     4585.41      14301.60     3.12x
+BenchmarkReconstruct10x2x1M-8        8081.15      28216.41     3.49x
+BenchmarkReconstruct5x2x1M-8         5780.07      28015.37     4.85x
+BenchmarkReconstruct10x4x1M-8        4352.56      14367.61     3.30x
+BenchmarkReconstruct50x20x1M-8       1364.35      4189.79      3.07x
+BenchmarkReconstruct10x4x16M-8       1484.35      5779.53      3.89x
+```
+
+The package will use [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) instructions combined with AVX512 when these are available.
+This further improves speed by up to 3x over AVX2 code paths.
+
+## ARM64 NEON
+
+By exploiting NEON instructions the performance for ARM has been accelerated. 
+Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
+
+```
+BenchmarkGalois128K-64        119562     10028 ns/op        13070.78 MB/s
+BenchmarkGalois1M-64           14380     83424 ns/op        12569.22 MB/s
+BenchmarkGaloisXor128K-64      96508     12432 ns/op        10543.29 MB/s
+BenchmarkGaloisXor1M-64        10000    100322 ns/op        10452.13 MB/s
+```
+
+# Performance on ppc64le
+
+The performance for ppc64le has been accelerated. 
+This gives roughly a 10x performance improvement on this architecture as can be seen below:
+
+```
+benchmark                      old MB/s     new MB/s     speedup
+BenchmarkGalois128K-160        948.87       8878.85      9.36x
+BenchmarkGalois1M-160          968.85       9041.92      9.33x
+BenchmarkGaloisXor128K-160     862.02       7905.00      9.17x
+BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
+```
+
+# Legal
+
+> None of section below is legal advice. Seek your own legal counsel.
+> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
+> Users are encouraged to independently verify they comply with all legal requirements. 
+
+As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/)
+there has been lawsuits related to possible patents of aspects of erasure coding functionality.
+
+As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
+This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.
+
+This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.
+
+The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left. 
+
+# Links
+* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
+* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
+* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
+* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
+* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
+* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
+* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
+* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
+
+# License
+
+This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/reedsolomon/galois.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@ -0,0 +1,583 @@
+//go:build !noasm && !appengine && !gccgo && !nopshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+const pshufb = true
+
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor_64(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2_64(low, high, in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] = low[l] ^ high[h]
+	}
+}
+
+func galMulSSSE3Xor(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] ^= low[l] ^ high[h]
+	}
+}
+*/
+
+// bigSwitchover is the size where 64 bytes are processed per loop.
+const bigSwitchover = 128
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		copy(out, in)
+		return
+	}
+	if o.useAVX2 {
+		if len(in) >= bigSwitchover {
+			galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+			done := (len(in) >> 6) << 6
+			in = in[done:]
+			out = out[done:]
+		}
+		if len(in) > 32 {
+			galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+			done := (len(in) >> 5) << 5
+			in = in[done:]
+			out = out[done:]
+		}
+	} else if o.useSSSE3 {
+		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done := (len(in) >> 4) << 4
+		in = in[done:]
+		out = out[done:]
+	}
+	out = out[:len(in)]
+	mt := mulTable[c][:256]
+	for i := range in {
+		out[i] = mt[in[i]]
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		sliceXor(in, out, o)
+		return
+	}
+
+	if o.useAVX2 {
+		if len(in) >= bigSwitchover {
+			galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+			done := (len(in) >> 6) << 6
+			in = in[done:]
+			out = out[done:]
+		}
+		if len(in) >= 32 {
+			galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+			done := (len(in) >> 5) << 5
+			in = in[done:]
+			out = out[done:]
+		}
+	} else if o.useSSSE3 {
+		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done := (len(in) >> 4) << 4
+		in = in[done:]
+		out = out[done:]
+	}
+	if len(in) == 0 {
+		return
+	}
+	out = out[:len(in)]
+	mt := mulTable[c][:256]
+	for i := range in {
+		out[i] ^= mt[in[i]]
+	}
+}
+
+// simple slice xor
+func sliceXor(in, out []byte, o *options) {
+	if o.useSSE2 {
+		if len(in) >= bigSwitchover {
+			if o.useAVX2 {
+				avx2XorSlice_64(in, out)
+				done := (len(in) >> 6) << 6
+				in = in[done:]
+				out = out[done:]
+			} else {
+				sSE2XorSlice_64(in, out)
+				done := (len(in) >> 6) << 6
+				in = in[done:]
+				out = out[done:]
+			}
+		}
+		if len(in) >= 16 {
+			sSE2XorSlice(in, out)
+			done := (len(in) >> 4) << 4
+			in = in[done:]
+			out = out[done:]
+		}
+	} else {
+		sliceXorGo(in, out, o)
+		return
+	}
+	out = out[:len(in)]
+	for i := range in {
+		out[i] ^= in[i]
+	}
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	t01 := &multiply256LUT[log_m01]
+	t23 := &multiply256LUT[log_m23]
+	t02 := &multiply256LUT[log_m02]
+	if o.useAVX512 {
+		if log_m01 == modulus {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	} else if o.useAVX2 {
+		if log_m01 == modulus {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus {
+					ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	if false && o.useAvx512GFNI {
+		// Note that these currently require that length is multiple of 64.
+		t01 := gf2p811dMulMatrices[log_m01]
+		t23 := gf2p811dMulMatrices[log_m23]
+		t02 := gf2p811dMulMatrices[log_m02]
+		if log_m01 == modulus8 {
+			if log_m23 == modulus8 {
+				if log_m02 == modulus8 {
+					ifftDIT48_gfni_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_gfni_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus8 {
+					ifftDIT48_gfni_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_gfni_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus8 {
+				if log_m02 == modulus8 {
+					ifftDIT48_gfni_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_gfni_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus8 {
+					ifftDIT48_gfni_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_gfni_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	if o.useAVX2 {
+		// Note that these currently require that length is multiple of 64.
+		t01 := &multiply256LUT8[log_m01]
+		t23 := &multiply256LUT8[log_m23]
+		t02 := &multiply256LUT8[log_m02]
+		if log_m01 == modulus8 {
+			if log_m23 == modulus8 {
+				if log_m02 == modulus8 {
+					ifftDIT48_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus8 {
+					ifftDIT48_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m23 == modulus8 {
+				if log_m02 == modulus8 {
+					ifftDIT48_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m02 == modulus8 {
+					ifftDIT48_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					ifftDIT48_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	t01 := &multiply256LUT[log_m01]
+	t23 := &multiply256LUT[log_m23]
+	t02 := &multiply256LUT[log_m02]
+	if o.useAVX512 {
+		if log_m02 == modulus {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	} else if o.useAVX2 {
+		if log_m02 == modulus {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus {
+				if log_m23 == modulus {
+					fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus {
+					fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	if len(work[0]) == 0 {
+		return
+	}
+
+	if false && o.useAvx512GFNI {
+		t01 := gf2p811dMulMatrices[log_m01]
+		t23 := gf2p811dMulMatrices[log_m23]
+		t02 := gf2p811dMulMatrices[log_m02]
+		// Note that these currently require that length is multiple of 64.
+		if log_m02 == modulus8 {
+			if log_m01 == modulus8 {
+				if log_m23 == modulus8 {
+					fftDIT48_gfni_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_gfni_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus8 {
+					fftDIT48_gfni_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_gfni_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus8 {
+				if log_m23 == modulus8 {
+					fftDIT48_gfni_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_gfni_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus8 {
+					fftDIT48_gfni_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_gfni_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	if o.useAVX2 {
+		t01 := &multiply256LUT8[log_m01]
+		t23 := &multiply256LUT8[log_m23]
+		t02 := &multiply256LUT8[log_m02]
+		// Note that these currently require that length is multiple of 64.
+		if log_m02 == modulus8 {
+			if log_m01 == modulus8 {
+				if log_m23 == modulus8 {
+					fftDIT48_avx2_7(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_avx2_3(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus8 {
+					fftDIT48_avx2_5(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_avx2_1(work, dist*24, t01, t23, t02)
+				}
+			}
+		} else {
+			if log_m01 == modulus8 {
+				if log_m23 == modulus8 {
+					fftDIT48_avx2_6(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_avx2_2(work, dist*24, t01, t23, t02)
+				}
+			} else {
+				if log_m23 == modulus8 {
+					fftDIT48_avx2_4(work, dist*24, t01, t23, t02)
+				} else {
+					fftDIT48_avx2_0(work, dist*24, t01, t23, t02)
+				}
+			}
+		}
+		return
+	}
+	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	if len(x) == 0 {
+		return
+	}
+	if o.useAVX2 {
+		tmp := &multiply256LUT[log_m]
+		fftDIT2_avx2(x, y, tmp)
+	} else if o.useSSSE3 {
+		tmp := &multiply256LUT[log_m]
+		fftDIT2_ssse3(x, y, tmp)
+	} else {
+		// Reference version:
+		refMulAdd(x, y, log_m)
+		sliceXor(x, y, o)
+	}
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+	if len(x) == 0 {
+		return
+	}
+
+	if o.useAVX2 {
+		fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
+		if len(x)&63 == 0 {
+			return
+		}
+		done := (len(y) >> 6) << 6
+		y = y[done:]
+		x = x[done:]
+	}
+	mulAdd8(x, y, log_m, o)
+	sliceXor(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+	if len(x) == 0 {
+		return
+	}
+
+	if o.useAVX2 {
+		ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
+		if len(x)&63 == 0 {
+			return
+		}
+		done := (len(y) >> 6) << 6
+		y = y[done:]
+		x = x[done:]
+	}
+	sliceXor(x, y, o)
+	mulAdd8(x, y, log_m, o)
+}
+
+func mulAdd8(x, y []byte, log_m ffe8, o *options) {
+	if o.useAVX2 {
+		t := &multiply256LUT8[log_m]
+		galMulAVX2Xor_64(t[:16], t[16:32], y, x)
+		done := (len(y) >> 6) << 6
+		y = y[done:]
+		x = x[done:]
+	} else if o.useSSSE3 {
+		t := &multiply256LUT8[log_m]
+		galMulSSSE3Xor(t[:16], t[16:32], y, x)
+		done := (len(y) >> 4) << 4
+		y = y[done:]
+		x = x[done:]
+	}
+	refMulAdd8(x, y, log_m)
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	if len(x) == 0 {
+		return
+	}
+	if o.useAVX2 {
+		tmp := &multiply256LUT[log_m]
+		ifftDIT2_avx2(x, y, tmp)
+	} else if o.useSSSE3 {
+		tmp := &multiply256LUT[log_m]
+		ifftDIT2_ssse3(x, y, tmp)
+	} else {
+		// Reference version:
+		sliceXor(x, y, o)
+		refMulAdd(x, y, log_m)
+	}
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	if len(x) == 0 {
+		return
+	}
+	if o.useAVX2 {
+		tmp := &multiply256LUT[log_m]
+		mulgf16_avx2(x, y, tmp)
+	} else if o.useSSSE3 {
+		tmp := &multiply256LUT[log_m]
+		mulgf16_ssse3(x, y, tmp)
+	} else {
+		refMul(x, y, log_m)
+	}
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+	if o.useAVX2 {
+		t := &multiply256LUT8[log_m]
+		galMulAVX2_64(t[:16], t[16:32], in, out)
+		done := (len(in) >> 6) << 6
+		in = in[done:]
+		out = out[done:]
+	} else if o.useSSSE3 {
+		t := &multiply256LUT8[log_m]
+		galMulSSSE3(t[:16], t[16:32], in, out)
+		done := (len(in) >> 4) << 4
+		in = in[done:]
+		out = out[done:]
+	}
+	out = out[:len(in)]
+	mt := mul8LUTs[log_m].Value[:]
+	for i := range in {
+		out[i] = byte(mt[in[i]])
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
@ -0,0 +1,310 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+//+build !nopshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
+// and http://jerasure.org/jerasure/gf-complete/tree/master
+
+// func galMulSSSE3Xor(low, high, in, out []byte)
+TEXT ·galMulSSSE3Xor(SB), 7, $0
+	MOVQ   low+0(FP), SI     // SI: &low
+	MOVQ   high+24(FP), DX   // DX: &high
+	MOVOU  (SI), X6          // X6 low
+	MOVOU  (DX), X7          // X7: high
+	MOVQ   $15, BX           // BX: low mask
+	MOVQ   BX, X8
+	PXOR   X5, X5
+	MOVQ   in+48(FP), SI     // R11: &in
+	MOVQ   in_len+56(FP), R9 // R9: len(in)
+	MOVQ   out+72(FP), DX    // DX: &out
+	PSHUFB X5, X8            // X8: lomask (unpacked)
+	SHRQ   $4, R9            // len(in) / 16
+	MOVQ   SI, AX
+	MOVQ   DX, BX
+	ANDQ   $15, AX
+	ANDQ   $15, BX
+	CMPQ   R9, $0
+	JEQ    done_xor
+	ORQ    AX, BX
+	CMPQ   BX, $0
+	JNZ    loopback_xor
+
+loopback_xor_aligned:
+	MOVOA  (SI), X0             // in[x]
+	MOVOA  (DX), X4             // out[x]
+	MOVOA  X0, X1               // in[x]
+	MOVOA  X6, X2               // low copy
+	MOVOA  X7, X3               // high copy
+	PSRLQ  $4, X1               // X1: high input
+	PAND   X8, X0               // X0: low input
+	PAND   X8, X1               // X0: high input
+	PSHUFB X0, X2               // X2: mul low part
+	PSHUFB X1, X3               // X3: mul high part
+	PXOR   X2, X3               // X3: Result
+	PXOR   X4, X3               // X3: Result xor existing out
+	MOVOA  X3, (DX)             // Store
+	ADDQ   $16, SI              // in+=16
+	ADDQ   $16, DX              // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_xor_aligned
+	JMP    done_xor
+
+loopback_xor:
+	MOVOU  (SI), X0     // in[x]
+	MOVOU  (DX), X4     // out[x]
+	MOVOU  X0, X1       // in[x]
+	MOVOU  X6, X2       // low copy
+	MOVOU  X7, X3       // high copy
+	PSRLQ  $4, X1       // X1: high input
+	PAND   X8, X0       // X0: low input
+	PAND   X8, X1       // X0: high input
+	PSHUFB X0, X2       // X2: mul low part
+	PSHUFB X1, X3       // X3: mul high part
+	PXOR   X2, X3       // X3: Result
+	PXOR   X4, X3       // X3: Result xor existing out
+	MOVOU  X3, (DX)     // Store
+	ADDQ   $16, SI      // in+=16
+	ADDQ   $16, DX      // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_xor
+
+done_xor:
+	RET
+
+// func galMulSSSE3(low, high, in, out []byte)
+TEXT ·galMulSSSE3(SB), 7, $0
+	MOVQ   low+0(FP), SI     // SI: &low
+	MOVQ   high+24(FP), DX   // DX: &high
+	MOVOU  (SI), X6          // X6 low
+	MOVOU  (DX), X7          // X7: high
+	MOVQ   $15, BX           // BX: low mask
+	MOVQ   BX, X8
+	PXOR   X5, X5
+	MOVQ   in+48(FP), SI     // R11: &in
+	MOVQ   in_len+56(FP), R9 // R9: len(in)
+	MOVQ   out+72(FP), DX    // DX: &out
+	PSHUFB X5, X8            // X8: lomask (unpacked)
+	MOVQ   SI, AX
+	MOVQ   DX, BX
+	SHRQ   $4, R9            // len(in) / 16
+	ANDQ   $15, AX
+	ANDQ   $15, BX
+	CMPQ   R9, $0
+	JEQ    done
+	ORQ    AX, BX
+	CMPQ   BX, $0
+	JNZ    loopback
+
+loopback_aligned:
+	MOVOA  (SI), X0         // in[x]
+	MOVOA  X0, X1           // in[x]
+	MOVOA  X6, X2           // low copy
+	MOVOA  X7, X3           // high copy
+	PSRLQ  $4, X1           // X1: high input
+	PAND   X8, X0           // X0: low input
+	PAND   X8, X1           // X0: high input
+	PSHUFB X0, X2           // X2: mul low part
+	PSHUFB X1, X3           // X3: mul high part
+	PXOR   X2, X3           // X3: Result
+	MOVOA  X3, (DX)         // Store
+	ADDQ   $16, SI          // in+=16
+	ADDQ   $16, DX          // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_aligned
+	JMP    done
+
+loopback:
+	MOVOU  (SI), X0 // in[x]
+	MOVOU  X0, X1   // in[x]
+	MOVOA  X6, X2   // low copy
+	MOVOA  X7, X3   // high copy
+	PSRLQ  $4, X1   // X1: high input
+	PAND   X8, X0   // X0: low input
+	PAND   X8, X1   // X0: high input
+	PSHUFB X0, X2   // X2: mul low part
+	PSHUFB X1, X3   // X3: mul high part
+	PXOR   X2, X3   // X3: Result
+	MOVOU  X3, (DX) // Store
+	ADDQ   $16, SI  // in+=16
+	ADDQ   $16, DX  // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback
+
+done:
+	RET
+
+// func galMulAVX2Xor(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor(SB), 7, $0
+	MOVQ  low+0(FP), SI     // SI: &low
+	MOVQ  high+24(FP), DX   // DX: &high
+	MOVQ  $15, BX           // BX: low mask
+	MOVQ  BX, X5
+	MOVOU (SI), X6          // X6: low
+	MOVOU (DX), X7          // X7: high
+	MOVQ  in_len+56(FP), R9 // R9: len(in)
+
+	VINSERTI128  $1, X6, Y6, Y6 // low
+	VINSERTI128  $1, X7, Y7, Y7 // high
+	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
+
+	SHRQ  $5, R9         // len(in) / 32
+	MOVQ  out+72(FP), DX // DX: &out
+	MOVQ  in+48(FP), SI  // SI: &in
+	TESTQ R9, R9
+	JZ    done_xor_avx2
+
+loopback_xor_avx2:
+	VMOVDQU (SI), Y0
+	VMOVDQU (DX), Y4
+	VPSRLQ  $4, Y0, Y1 // Y1: high input
+	VPAND   Y8, Y0, Y0 // Y0: low input
+	VPAND   Y8, Y1, Y1 // Y1: high input
+	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+	VPXOR   Y3, Y2, Y3 // Y3: Result
+	VPXOR   Y4, Y3, Y4 // Y4: Result
+	VMOVDQU Y4, (DX)
+
+	ADDQ $32, SI           // in+=32
+	ADDQ $32, DX           // out+=32
+	SUBQ $1, R9
+	JNZ  loopback_xor_avx2
+
+done_xor_avx2:
+	VZEROUPPER
+	RET
+
+// func galMulAVX2(low, high, in, out []byte)
+TEXT ·galMulAVX2(SB), 7, $0
+	MOVQ  low+0(FP), SI     // SI: &low
+	MOVQ  high+24(FP), DX   // DX: &high
+	MOVQ  $15, BX           // BX: low mask
+	MOVQ  BX, X5
+	MOVOU (SI), X6          // X6: low
+	MOVOU (DX), X7          // X7: high
+	MOVQ  in_len+56(FP), R9 // R9: len(in)
+
+	VINSERTI128  $1, X6, Y6, Y6 // low
+	VINSERTI128  $1, X7, Y7, Y7 // high
+	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
+
+	SHRQ  $5, R9         // len(in) / 32
+	MOVQ  out+72(FP), DX // DX: &out
+	MOVQ  in+48(FP), SI  // SI: &in
+	TESTQ R9, R9
+	JZ    done_avx2
+
+loopback_avx2:
+	VMOVDQU (SI), Y0
+	VPSRLQ  $4, Y0, Y1 // Y1: high input
+	VPAND   Y8, Y0, Y0 // Y0: low input
+	VPAND   Y8, Y1, Y1 // Y1: high input
+	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+	VPXOR   Y3, Y2, Y4 // Y4: Result
+	VMOVDQU Y4, (DX)
+
+	ADDQ $32, SI       // in+=32
+	ADDQ $32, DX       // out+=32
+	SUBQ $1, R9
+	JNZ  loopback_avx2
+
+done_avx2:
+	VZEROUPPER
+	RET
+
+// func galMulAVX2Xor_64(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor_64(SB), 7, $0
+	MOVQ low+0(FP), SI     // SI: &low
+	MOVQ high+24(FP), DX   // DX: &high
+	MOVQ $15, BX           // BX: low mask
+	MOVQ BX, X5
+	MOVQ in_len+56(FP), R9 // R9: len(in)
+
+	VBROADCASTI128 (SI), Y6 // low table
+	VBROADCASTI128 (DX), Y7 // high high table
+	VPBROADCASTB   X5, Y8   // Y8: lomask (unpacked)
+
+	SHRQ  $6, R9           // len(in) / 64
+	MOVQ  out+72(FP), DX   // DX: &out
+	MOVQ  in+48(FP), SI    // SI: &in
+	TESTQ R9, R9
+	JZ    done_xor_avx2_64
+
+loopback_xor_avx2_64:
+	VMOVDQU (SI), Y0
+	VMOVDQU 32(SI), Y10
+	VMOVDQU (DX), Y4
+	VMOVDQU 32(DX), Y14
+	VPSRLQ  $4, Y0, Y1    // Y1: high input
+	VPSRLQ  $4, Y10, Y11  // Y11: high input 2
+	VPAND   Y8, Y0, Y0    // Y0: low input
+	VPAND   Y8, Y10, Y10  // Y10: low input 2
+	VPAND   Y8, Y1, Y1    // Y11: high input
+	VPAND   Y8, Y11, Y11  // Y11: high input 2
+	VPSHUFB Y0, Y6, Y2    // Y2: mul low part
+	VPSHUFB Y10, Y6, Y12  // Y12: mul low part 2
+	VPSHUFB Y1, Y7, Y3    // Y3: mul high part
+	VPSHUFB Y11, Y7, Y13  // Y13: mul high part 2
+	VPXOR   Y3, Y2, Y3    // Y3: Result
+	VPXOR   Y13, Y12, Y13 // Y13: Result 2
+	VPXOR   Y4, Y3, Y4    // Y4: Result
+	VPXOR   Y14, Y13, Y14 // Y4: Result 2
+	VMOVDQU Y4, (DX)
+	VMOVDQU Y14, 32(DX)
+
+	ADDQ $64, SI              // in+=64
+	ADDQ $64, DX              // out+=64
+	SUBQ $1, R9
+	JNZ  loopback_xor_avx2_64
+
+done_xor_avx2_64:
+	VZEROUPPER
+	RET
+
+// func galMulAVX2_64(low, high, in, out []byte)
+TEXT ·galMulAVX2_64(SB), 7, $0
+	MOVQ           low+0(FP), SI     // SI: &low
+	MOVQ           high+24(FP), DX   // DX: &high
+	MOVQ           $15, BX           // BX: low mask
+	MOVQ           BX, X5
+	MOVQ           in_len+56(FP), R9 // R9: len(in)
+	VBROADCASTI128 (SI), Y6          // low table
+	VBROADCASTI128 (DX), Y7          // high high table
+	VPBROADCASTB   X5, Y8            // Y8: lomask (unpacked)
+
+	SHRQ  $6, R9         // len(in) / 64
+	MOVQ  out+72(FP), DX // DX: &out
+	MOVQ  in+48(FP), SI  // SI: &in
+	TESTQ R9, R9
+	JZ    done_avx2_64
+
+loopback_avx2_64:
+	VMOVDQU (SI), Y0
+	VMOVDQU 32(SI), Y10
+	VPSRLQ  $4, Y0, Y1    // Y1: high input
+	VPSRLQ  $4, Y10, Y11  // Y11: high input 2
+	VPAND   Y8, Y0, Y0    // Y0: low input
+	VPAND   Y8, Y10, Y10  // Y10: low input
+	VPAND   Y8, Y1, Y1    // Y1: high input
+	VPAND   Y8, Y11, Y11  // Y11: high input 2
+	VPSHUFB Y0, Y6, Y2    // Y2: mul low part
+	VPSHUFB Y10, Y6, Y12  // Y12: mul low part 2
+	VPSHUFB Y1, Y7, Y3    // Y3: mul high part
+	VPSHUFB Y11, Y7, Y13  // Y13: mul high part 2
+	VPXOR   Y3, Y2, Y4    // Y4: Result
+	VPXOR   Y13, Y12, Y14 // Y14: Result 2
+	VMOVDQU Y4, (DX)
+	VMOVDQU Y14, 32(DX)
+
+	ADDQ $64, SI          // in+=64
+	ADDQ $64, DX          // out+=64
+	SUBQ $1, R9
+	JNZ  loopback_avx2_64
+
+done_avx2_64:
+	VZEROUPPER
+	RET
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
@ -0,0 +1,130 @@
+//go:build !noasm && !appengine && !gccgo && !nopshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+package reedsolomon
+
+const pshufb = true
+
+//go:noescape
+func galMulNEON(low, high, in, out []byte)
+
+//go:noescape
+func galMulXorNEON(low, high, in, out []byte)
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		copy(out, in)
+		return
+	}
+	var done int
+	galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+	done = (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		sliceXor(in, out, o)
+		return
+	}
+	var done int
+	galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+	done = (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	refMulAdd(x, y, log_m)
+	// 64 byte aligned, always full.
+	xorSliceNEON(x, y)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	mulAdd8(x, y, log_m, o)
+	sliceXor(x, y, o)
+}
+
+// 2-way butterfly
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	// 64 byte aligned, always full.
+	xorSliceNEON(x, y)
+	// Reference version:
+	refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	sliceXor(x, y, o)
+	mulAdd8(x, y, log_m, o)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	refMul(x, y, log_m)
+}
+
+func mulAdd8(out, in []byte, log_m ffe8, o *options) {
+	t := &multiply256LUT8[log_m]
+	galMulXorNEON(t[:16], t[16:32], in, out)
+	done := (len(in) >> 5) << 5
+	in = in[done:]
+	if len(in) > 0 {
+		out = out[done:]
+		refMulAdd8(in, out, log_m)
+	}
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+	var done int
+	t := &multiply256LUT8[log_m]
+	galMulNEON(t[:16], t[16:32], in, out)
+	done = (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mul8LUTs[log_m].Value[:]
+		for i := done; i < len(in); i++ {
+			out[i] ^= byte(mt[in[i]])
+		}
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
@ -0,0 +1,102 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+//+build !nopshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+#define LOAD(LO1, LO2, HI1, HI2) \
+	VLD1.P 32(R1), [LO1.B16, LO2.B16] \
+	                                  \
+	\ // Get low input and high input
+	VUSHR  $4, LO1.B16, HI1.B16       \
+	VUSHR  $4, LO2.B16, HI2.B16       \
+	VAND   V8.B16, LO1.B16, LO1.B16   \
+	VAND   V8.B16, LO2.B16, LO2.B16
+
+#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
+	\ // Mul low part and mul high part
+	VTBL V0.B16, [MUL_LO.B16], OUT1.B16  \
+	VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
+	VTBL V1.B16, [MUL_LO.B16], TMP1.B16  \
+	VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
+	                                     \
+	\ // Combine results
+	VEOR OUT2.B16, OUT1.B16, OUT1.B16    \
+	VEOR TMP2.B16, TMP1.B16, OUT2.B16
+
+// func galMulNEON(low, high, in, out []byte)
+TEXT ·galMulNEON(SB), 7, $0
+	MOVD in_base+48(FP), R1
+	MOVD in_len+56(FP), R2   // length of message
+	MOVD out_base+72(FP), R5
+	SUBS $32, R2
+	BMI  complete
+
+	MOVD low+0(FP), R10   // R10: &low
+	MOVD high+24(FP), R11 // R11: &high
+	VLD1 (R10), [V6.B16]
+	VLD1 (R11), [V7.B16]
+
+	//
+	// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+	// WORD $0x4e010c68 // dup v8.16b, w3
+	//
+	MOVD $0x0f, R3
+	VMOV R3, V8.B[0]
+	VDUP V8.B[0], V8.B16
+
+loop:
+	// Main loop
+	LOAD(V0, V1, V10, V11)
+	GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+	// Store result
+	VST1.P [V4.D2, V5.D2], 32(R5)
+
+	SUBS $32, R2
+	BPL  loop
+
+complete:
+	RET
+
+// func galMulXorNEON(low, high, in, out []byte)
+TEXT ·galMulXorNEON(SB), 7, $0
+	MOVD in_base+48(FP), R1
+	MOVD in_len+56(FP), R2   // length of message
+	MOVD out_base+72(FP), R5
+	SUBS $32, R2
+	BMI  completeXor
+
+	MOVD low+0(FP), R10   // R10: &low
+	MOVD high+24(FP), R11 // R11: &high
+	VLD1 (R10), [V6.B16]
+	VLD1 (R11), [V7.B16]
+
+	//
+	// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+	// WORD $0x4e010c68 // dup v8.16b, w3
+	//
+	MOVD $0x0f, R3
+	VMOV R3, V8.B[0]
+	VDUP V8.B[0], V8.B16
+
+loopXor:
+	// Main loop
+	VLD1 (R5), [V20.B16, V21.B16]
+
+	LOAD(V0, V1, V10, V11)
+	GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+	VEOR V20.B16, V4.B16, V4.B16
+	VEOR V21.B16, V5.B16, V5.B16
+
+	// Store result
+	VST1.P [V4.D2, V5.D2], 32(R5)
+
+	SUBS $32, R2
+	BPL  loopXor
+
+completeXor:
+	RET
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
@ -0,0 +1,33 @@
+//go:build !amd64 || noasm || appengine || gccgo || nogen
+
+package reedsolomon
+
+const maxAvx2Inputs = 1
+const maxAvx2Outputs = 1
+const minAvx2Size = 1
+const avxSizeMask = 0
+const avx2CodeGen = false
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
+
+func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
+
+func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
+
+func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
+
+func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
+
+func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
+	panic("codegen not available")
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@ -0,0 +1,91 @@
+//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo || nopshufb) && (!ppc64le || noasm || appengine || gccgo || nopshufb)
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+const pshufb = false
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	out = out[:len(in)]
+	if c == 1 {
+		copy(out, in)
+		return
+	}
+	mt := mulTable[c][:256]
+	for n, input := range in {
+		out[n] = mt[input]
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	out = out[:len(in)]
+	if c == 1 {
+		sliceXor(in, out, o)
+		return
+	}
+	mt := mulTable[c][:256]
+	for n, input := range in {
+		out[n] ^= mt[input]
+	}
+}
+
+func init() {
+	defaultOptions.useAVX512 = false
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	refMulAdd(x, y, log_m)
+	sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	refMulAdd8(x, y, log_m)
+	sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	sliceXorGo(x, y, o)
+	refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	sliceXorGo(x, y, o)
+	refMulAdd8(x, y, log_m)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	refMul(x, y, log_m)
+}
+
+func mulgf8(x, y []byte, log_m ffe8, o *options) {
+	refMul8(x, y, log_m)
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go
@ -0,0 +1,146 @@
+// Copyright 2015, Klaus Post, see LICENSE for details
+
+//go:build nopshufb && !noasm
+
+package reedsolomon
+
+// bigSwitchover is the size where 64 bytes are processed per loop.
+const bigSwitchover = 128
+
+const pshufb = false
+
+// simple slice xor
+func sliceXor(in, out []byte, o *options) {
+	if o.useSSE2 {
+		if len(in) >= bigSwitchover {
+			if o.useAVX2 {
+				avx2XorSlice_64(in, out)
+				done := (len(in) >> 6) << 6
+				in = in[done:]
+				out = out[done:]
+			} else {
+				sSE2XorSlice_64(in, out)
+				done := (len(in) >> 6) << 6
+				in = in[done:]
+				out = out[done:]
+			}
+		}
+		if len(in) >= 16 {
+			sSE2XorSlice(in, out)
+			done := (len(in) >> 4) << 4
+			in = in[done:]
+			out = out[done:]
+		}
+	} else {
+		sliceXorGo(in, out, o)
+		return
+	}
+	out = out[:len(in)]
+	for i := range in {
+		out[i] ^= in[i]
+	}
+}
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	out = out[:len(in)]
+	if c == 1 {
+		copy(out, in)
+		return
+	}
+	mt := mulTable[c][:256]
+	for len(in) >= 4 {
+		ii := (*[4]byte)(in)
+		oo := (*[4]byte)(out)
+		oo[0] = mt[ii[0]]
+		oo[1] = mt[ii[1]]
+		oo[2] = mt[ii[2]]
+		oo[3] = mt[ii[3]]
+		in = in[4:]
+		out = out[4:]
+	}
+	for n, input := range in {
+		out[n] = mt[input]
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	out = out[:len(in)]
+	if c == 1 {
+		sliceXor(in, out, o)
+		return
+	}
+	mt := mulTable[c][:256]
+	for len(in) >= 4 {
+		ii := (*[4]byte)(in)
+		oo := (*[4]byte)(out)
+		oo[0] ^= mt[ii[0]]
+		oo[1] ^= mt[ii[1]]
+		oo[2] ^= mt[ii[2]]
+		oo[3] ^= mt[ii[3]]
+		in = in[4:]
+		out = out[4:]
+	}
+	for n, input := range in {
+		out[n] ^= mt[input]
+	}
+}
+
+func init() {
+	defaultOptions.useAVX512 = false
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	refMulAdd(x, y, log_m)
+	sliceXor(x, y, o)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	refMulAdd8(x, y, log_m)
+	sliceXor(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	sliceXor(x, y, o)
+	refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	sliceXor(x, y, o)
+	refMulAdd8(x, y, log_m)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	refMul(x, y, log_m)
+}
+
+func mulgf8(x, y []byte, log_m ffe8, o *options) {
+	refMul8(x, y, log_m)
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
@ -0,0 +1,13 @@
+//go:build !amd64 || noasm || appengine || gccgo || pshufb
+
+// Copyright 2020, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
+	panic("codeSomeShardsAvx512 should not be called if built without asm")
+}
+
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
+	panic("codeSomeShardsAvx512P should not be called if built without asm")
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
@ -0,0 +1,146 @@
+//go:build !noasm && !appengine && !gccgo && !nopshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+package reedsolomon
+
+const pshufb = true
+
+//go:noescape
+func galMulPpc(low, high, in, out []byte)
+
+//go:noescape
+func galMulPpcXor(low, high, in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulPpc(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] = low[l] ^ high[h]
+	}
+}
+func galMulPpcXor(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] ^= low[l] ^ high[h]
+	}
+}
+*/
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		copy(out, in)
+		return
+	}
+	done := (len(in) >> 4) << 4
+	if done > 0 {
+		galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	if c == 1 {
+		sliceXor(in, out, o)
+		return
+	}
+	done := (len(in) >> 4) << 4
+	if done > 0 {
+		galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+
+// 4-way butterfly
+func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
+	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 4-way butterfly
+func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
+	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
+}
+
+// 2-way butterfly forward
+func fftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	refMulAdd(x, y, log_m)
+	sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly forward
+func fftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	mulAdd8(x, y, log_m, o)
+	sliceXorGo(x, y, o)
+}
+
+// 2-way butterfly inverse
+func ifftDIT2(x, y []byte, log_m ffe, o *options) {
+	// Reference version:
+	sliceXorGo(x, y, o)
+	refMulAdd(x, y, log_m)
+}
+
+// 2-way butterfly inverse
+func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
+	// Reference version:
+	sliceXorGo(x, y, o)
+	mulAdd8(x, y, log_m, o)
+}
+
+func mulgf16(x, y []byte, log_m ffe, o *options) {
+	refMul(x, y, log_m)
+}
+
+func mulAdd8(out, in []byte, log_m ffe8, o *options) {
+	t := &multiply256LUT8[log_m]
+	galMulPpcXor(t[:16], t[16:32], in, out)
+	done := (len(in) >> 4) << 4
+	in = in[done:]
+	if len(in) > 0 {
+		out = out[done:]
+		refMulAdd8(in, out, log_m)
+	}
+}
+
+func mulgf8(out, in []byte, log_m ffe8, o *options) {
+	var done int
+	t := &multiply256LUT8[log_m]
+	galMulPpc(t[:16], t[16:32], in, out)
+	done = (len(in) >> 4) << 4
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mul8LUTs[log_m].Value[:]
+		for i := done; i < len(in); i++ {
+			out[i] ^= byte(mt[in[i]])
+		}
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
@ -0,0 +1,127 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+//+build !pshufb
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+#include "textflag.h"
+
+#define LOW       R3
+#define HIGH      R4
+#define IN        R5
+#define LEN       R6
+#define OUT       R7
+#define CONSTANTS R8
+#define OFFSET    R9
+#define OFFSET1   R10
+#define OFFSET2   R11
+
+#define X6        VS34
+#define X6_       V2
+#define X7        VS35
+#define X7_       V3
+#define MSG       VS36
+#define MSG_      V4
+#define MSG_HI    VS37
+#define MSG_HI_   V5
+#define RESULT    VS38
+#define RESULT_   V6
+#define ROTATE    VS39
+#define ROTATE_   V7
+#define MASK      VS40
+#define MASK_     V8
+#define FLIP      VS41
+#define FLIP_     V9
+
+// func galMulPpc(low, high, in, out []byte)
+TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
+	MOVD low+0(FP), LOW
+	MOVD high+24(FP), HIGH
+	MOVD in+48(FP), IN
+	MOVD in_len+56(FP), LEN
+	MOVD out+72(FP), OUT
+
+	MOVD $16, OFFSET1
+	MOVD $32, OFFSET2
+
+	MOVD   $·constants(SB), CONSTANTS
+	LXVD2X (CONSTANTS)(R0), ROTATE
+	LXVD2X (CONSTANTS)(OFFSET1), MASK
+	LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+	LXVD2X (LOW)(R0), X6
+	LXVD2X (HIGH)(R0), X7
+	VPERM  X6_, V31, FLIP_, X6_
+	VPERM  X7_, V31, FLIP_, X7_
+
+	MOVD $0, OFFSET
+
+loop:
+	LXVD2X (IN)(OFFSET), MSG
+
+	VSRB  MSG_, ROTATE_, MSG_HI_
+	VAND  MSG_, MASK_, MSG_
+	VPERM X6_, V31, MSG_, MSG_
+	VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+	VXOR MSG_, MSG_HI_, MSG_
+
+	STXVD2X MSG, (OUT)(OFFSET)
+
+	ADD $16, OFFSET, OFFSET
+	CMP LEN, OFFSET
+	BGT loop
+	RET
+
+// func galMulPpcXorlow, high, in, out []byte)
+TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
+	MOVD low+0(FP), LOW
+	MOVD high+24(FP), HIGH
+	MOVD in+48(FP), IN
+	MOVD in_len+56(FP), LEN
+	MOVD out+72(FP), OUT
+
+	MOVD $16, OFFSET1
+	MOVD $32, OFFSET2
+
+	MOVD   $·constants(SB), CONSTANTS
+	LXVD2X (CONSTANTS)(R0), ROTATE
+	LXVD2X (CONSTANTS)(OFFSET1), MASK
+	LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+	LXVD2X (LOW)(R0), X6
+	LXVD2X (HIGH)(R0), X7
+	VPERM  X6_, V31, FLIP_, X6_
+	VPERM  X7_, V31, FLIP_, X7_
+
+	MOVD $0, OFFSET
+
+loopXor:
+	LXVD2X (IN)(OFFSET), MSG
+	LXVD2X (OUT)(OFFSET), RESULT
+
+	VSRB  MSG_, ROTATE_, MSG_HI_
+	VAND  MSG_, MASK_, MSG_
+	VPERM X6_, V31, MSG_, MSG_
+	VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+	VXOR MSG_, MSG_HI_, MSG_
+	VXOR MSG_, RESULT_, RESULT_
+
+	STXVD2X RESULT, (OUT)(OFFSET)
+
+	ADD $16, OFFSET, OFFSET
+	CMP LEN, OFFSET
+	BGT loopXor
+	RET
+
+DATA ·constants+0x0(SB)/8, $0x0404040404040404
+DATA ·constants+0x8(SB)/8, $0x0404040404040404
+DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x20(SB)/8, $0x0706050403020100
+DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·constants(SB), 8, $48
--- a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
@ -0,0 +1,164 @@
+/**
+ * A thread-safe tree which caches inverted matrices.
+ *
+ * Copyright 2016, Peter Collins
+ */
+
+package reedsolomon
+
+import (
+	"errors"
+	"sync"
+)
+
+// The tree uses a Reader-Writer mutex to make it thread-safe
+// when accessing cached matrices and inserting new ones.
+type inversionTree struct {
+	mutex sync.RWMutex
+	root  inversionNode
+}
+
+type inversionNode struct {
+	matrix   matrix
+	children []*inversionNode
+}
+
+// newInversionTree initializes a tree for storing inverted matrices.
+// Note that the root node is the identity matrix as it implies
+// there were no errors with the original data.
+func newInversionTree(dataShards, parityShards int) *inversionTree {
+	identity, _ := identityMatrix(dataShards)
+	return &inversionTree{
+		root: inversionNode{
+			matrix:   identity,
+			children: make([]*inversionNode, dataShards+parityShards),
+		},
+	}
+}
+
+// GetInvertedMatrix returns the cached inverted matrix or nil if it
+// is not found in the tree keyed on the indices of invalid rows.
+func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+	if t == nil {
+		return nil
+	}
+	// Lock the tree for reading before accessing the tree.
+	t.mutex.RLock()
+	defer t.mutex.RUnlock()
+
+	// If no invalid indices were give we should return the root
+	// identity matrix.
+	if len(invalidIndices) == 0 {
+		return t.root.matrix
+	}
+
+	// Recursively search for the inverted matrix in the tree, passing in
+	// 0 as the parent index as we start at the root of the tree.
+	return t.root.getInvertedMatrix(invalidIndices, 0)
+}
+
+// errAlreadySet is returned if the root node matrix is overwritten
+var errAlreadySet = errors.New("the root node identity matrix is already set")
+
+// InsertInvertedMatrix inserts a new inverted matrix into the tree
+// keyed by the indices of invalid rows.  The total number of shards
+// is required for creating the proper length lists of child nodes for
+// each node.
+func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+	if t == nil {
+		return nil
+	}
+	// If no invalid indices were given then we are done because the
+	// root node is already set with the identity matrix.
+	if len(invalidIndices) == 0 {
+		return errAlreadySet
+	}
+
+	if !matrix.IsSquare() {
+		return errNotSquare
+	}
+
+	// Lock the tree for writing and reading before accessing the tree.
+	t.mutex.Lock()
+	defer t.mutex.Unlock()
+
+	// Recursively create nodes for the inverted matrix in the tree until
+	// we reach the node to insert the matrix to.  We start by passing in
+	// 0 as the parent index as we start at the root of the tree.
+	t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
+
+	return nil
+}
+
+func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
+	// Get the child node to search next from the list of children.  The
+	// list of children starts relative to the parent index passed in
+	// because the indices of invalid rows is sorted (by default).  As we
+	// search recursively, the first invalid index gets popped off the list,
+	// so when searching through the list of children, use that first invalid
+	// index to find the child node.
+	firstIndex := invalidIndices[0]
+	node := n.children[firstIndex-parent]
+
+	// If the child node doesn't exist in the list yet, fail fast by
+	// returning, so we can construct and insert the proper inverted matrix.
+	if node == nil {
+		return nil
+	}
+
+	// If there's more than one invalid index left in the list we should
+	// keep searching recursively.
+	if len(invalidIndices) > 1 {
+		// Search recursively on the child node by passing in the invalid indices
+		// with the first index popped off the front.  Also the parent index to
+		// pass down is the first index plus one.
+		return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
+	}
+	// If there aren't any more invalid indices to search, we've found our
+	// node.  Return it, however keep in mind that the matrix could still be
+	// nil because intermediary nodes in the tree are created sometimes with
+	// their inversion matrices uninitialized.
+	return node.matrix
+}
+
+func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
+	// As above, get the child node to search next from the list of children.
+	// The list of children starts relative to the parent index passed in
+	// because the indices of invalid rows is sorted (by default).  As we
+	// search recursively, the first invalid index gets popped off the list,
+	// so when searching through the list of children, use that first invalid
+	// index to find the child node.
+	firstIndex := invalidIndices[0]
+	node := n.children[firstIndex-parent]
+
+	// If the child node doesn't exist in the list yet, create a new
+	// node because we have the writer lock and add it to the list
+	// of children.
+	if node == nil {
+		// Make the length of the list of children equal to the number
+		// of shards minus the first invalid index because the list of
+		// invalid indices is sorted, so only this length of errors
+		// are possible in the tree.
+		node = &inversionNode{
+			children: make([]*inversionNode, shards-firstIndex),
+		}
+		// Insert the new node into the tree at the first index relative
+		// to the parent index that was given in this recursive call.
+		n.children[firstIndex-parent] = node
+	}
+
+	// If there's more than one invalid index left in the list we should
+	// keep searching recursively in order to find the node to add our
+	// matrix.
+	if len(invalidIndices) > 1 {
+		// As above, search recursively on the child node by passing in
+		// the invalid indices with the first index popped off the front.
+		// Also the total number of shards and parent index are passed down
+		// which is equal to the first index plus one.
+		node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
+	} else {
+		// If there aren't any more invalid indices to search, we've found our
+		// node.  Cache the inverted matrix in this node.
+		node.matrix = matrix
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/leopard.go
+++ b/vendor/github.com/klauspost/reedsolomon/leopard.go
--- a/vendor/github.com/klauspost/reedsolomon/leopard8.go
+++ b/vendor/github.com/klauspost/reedsolomon/leopard8.go
--- a/vendor/github.com/klauspost/reedsolomon/matrix.go
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
@ -0,0 +1,281 @@
+/**
+ * Matrix Algebra over an 8-bit Galois Field
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// byte[row][col]
+type matrix [][]byte
+
+// newMatrix returns a matrix of zeros.
+func newMatrix(rows, cols int) (matrix, error) {
+	if rows <= 0 {
+		return nil, errInvalidRowSize
+	}
+	if cols <= 0 {
+		return nil, errInvalidColSize
+	}
+
+	m := matrix(make([][]byte, rows))
+	for i := range m {
+		m[i] = make([]byte, cols)
+	}
+	return m, nil
+}
+
+// NewMatrixData initializes a matrix with the given row-major data.
+// Note that data is not copied from input.
+func newMatrixData(data [][]byte) (matrix, error) {
+	m := matrix(data)
+	err := m.Check()
+	if err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// IdentityMatrix returns an identity matrix of the given size.
+func identityMatrix(size int) (matrix, error) {
+	m, err := newMatrix(size, size)
+	if err != nil {
+		return nil, err
+	}
+	for i := range m {
+		m[i][i] = 1
+	}
+	return m, nil
+}
+
+// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
+var errInvalidRowSize = errors.New("invalid row size")
+
+// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
+var errInvalidColSize = errors.New("invalid column size")
+
+// errColSizeMismatch is returned if the size of matrix columns mismatch.
+var errColSizeMismatch = errors.New("column size is not the same for all rows")
+
+func (m matrix) Check() error {
+	rows := len(m)
+	if rows == 0 {
+		return errInvalidRowSize
+	}
+	cols := len(m[0])
+	if cols == 0 {
+		return errInvalidColSize
+	}
+
+	for _, col := range m {
+		if len(col) != cols {
+			return errColSizeMismatch
+		}
+	}
+	return nil
+}
+
+// String returns a human-readable string of the matrix contents.
+//
+// Example: [[1, 2], [3, 4]]
+func (m matrix) String() string {
+	rowOut := make([]string, 0, len(m))
+	for _, row := range m {
+		colOut := make([]string, 0, len(row))
+		for _, col := range row {
+			colOut = append(colOut, strconv.Itoa(int(col)))
+		}
+		rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
+	}
+	return "[" + strings.Join(rowOut, ", ") + "]"
+}
+
+// Multiply multiplies this matrix (the one on the left) by another
+// matrix (the one on the right) and returns a new matrix with the result.
+func (m matrix) Multiply(right matrix) (matrix, error) {
+	if len(m[0]) != len(right) {
+		return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
+	}
+	result, _ := newMatrix(len(m), len(right[0]))
+	for r, row := range result {
+		for c := range row {
+			var value byte
+			for i := range m[0] {
+				value ^= galMultiply(m[r][i], right[i][c])
+			}
+			result[r][c] = value
+		}
+	}
+	return result, nil
+}
+
+// Augment returns the concatenation of this matrix and the matrix on the right.
+func (m matrix) Augment(right matrix) (matrix, error) {
+	if len(m) != len(right) {
+		return nil, errMatrixSize
+	}
+
+	result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
+	for r, row := range m {
+		for c := range row {
+			result[r][c] = m[r][c]
+		}
+		cols := len(m[0])
+		for c := range right[0] {
+			result[r][cols+c] = right[r][c]
+		}
+	}
+	return result, nil
+}
+
+// errMatrixSize is returned if matrix dimensions are doesn't match.
+var errMatrixSize = errors.New("matrix sizes do not match")
+
+func (m matrix) SameSize(n matrix) error {
+	if len(m) != len(n) {
+		return errMatrixSize
+	}
+	for i := range m {
+		if len(m[i]) != len(n[i]) {
+			return errMatrixSize
+		}
+	}
+	return nil
+}
+
+// SubMatrix returns a part of this matrix. Data is copied.
+func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
+	result, err := newMatrix(rmax-rmin, cmax-cmin)
+	if err != nil {
+		return nil, err
+	}
+	// OPTME: If used heavily, use copy function to copy slice
+	for r := rmin; r < rmax; r++ {
+		for c := cmin; c < cmax; c++ {
+			result[r-rmin][c-cmin] = m[r][c]
+		}
+	}
+	return result, nil
+}
+
+// SwapRows Exchanges two rows in the matrix.
+func (m matrix) SwapRows(r1, r2 int) error {
+	if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
+		return errInvalidRowSize
+	}
+	m[r2], m[r1] = m[r1], m[r2]
+	return nil
+}
+
+// IsSquare will return true if the matrix is square, otherwise false.
+func (m matrix) IsSquare() bool {
+	return len(m) == len(m[0])
+}
+
+// errSingular is returned if the matrix is singular and cannot be inversed
+var errSingular = errors.New("matrix is singular")
+
+// errNotSquare is returned if attempting to inverse a non-square matrix.
+var errNotSquare = errors.New("only square matrices can be inverted")
+
+// Invert returns the inverse of this matrix.
+// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
+// The matrix must be square, otherwise ErrNotSquare is returned.
+func (m matrix) Invert() (matrix, error) {
+	if !m.IsSquare() {
+		return nil, errNotSquare
+	}
+
+	size := len(m)
+	work, _ := identityMatrix(size)
+	work, _ = m.Augment(work)
+
+	err := work.gaussianElimination()
+	if err != nil {
+		return nil, err
+	}
+
+	return work.SubMatrix(0, size, size, size*2)
+}
+
+func (m matrix) gaussianElimination() error {
+	rows := len(m)
+	columns := len(m[0])
+	// Clear out the part below the main diagonal and scale the main
+	// diagonal to be 1.
+	for r := 0; r < rows; r++ {
+		// If the element on the diagonal is 0, find a row below
+		// that has a non-zero and swap them.
+		if m[r][r] == 0 {
+			for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+				if m[rowBelow][r] != 0 {
+					err := m.SwapRows(r, rowBelow)
+					if err != nil {
+						return err
+					}
+					break
+				}
+			}
+		}
+		// If we couldn't find one, the matrix is singular.
+		if m[r][r] == 0 {
+			return errSingular
+		}
+		// Scale to 1.
+		if m[r][r] != 1 {
+			scale := galOneOver(m[r][r])
+			for c := 0; c < columns; c++ {
+				m[r][c] = galMultiply(m[r][c], scale)
+			}
+		}
+		// Make everything below the 1 be a 0 by subtracting
+		// a multiple of it.  (Subtraction and addition are
+		// both exclusive or in the Galois field.)
+		for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+			if m[rowBelow][r] != 0 {
+				scale := m[rowBelow][r]
+				for c := 0; c < columns; c++ {
+					m[rowBelow][c] ^= galMultiply(scale, m[r][c])
+				}
+			}
+		}
+	}
+
+	// Now clear the part above the main diagonal.
+	for d := 0; d < rows; d++ {
+		for rowAbove := 0; rowAbove < d; rowAbove++ {
+			if m[rowAbove][d] != 0 {
+				scale := m[rowAbove][d]
+				for c := 0; c < columns; c++ {
+					m[rowAbove][c] ^= galMultiply(scale, m[d][c])
+				}
+
+			}
+		}
+	}
+	return nil
+}
+
+// Create a Vandermonde matrix, which is guaranteed to have the
+// property that any subset of rows that forms a square matrix
+// is invertible.
+func vandermonde(rows, cols int) (matrix, error) {
+	result, err := newMatrix(rows, cols)
+	if err != nil {
+		return nil, err
+	}
+	for r, row := range result {
+		for c := range row {
+			result[r][c] = galExp(byte(r), c)
+		}
+	}
+	return result, nil
+}
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@ -0,0 +1,323 @@
+package reedsolomon
+
+import (
+	"runtime"
+	"strings"
+
+	"github.com/klauspost/cpuid/v2"
+)
+
+// Option allows to override processing parameters.
+type Option func(*options)
+
+type options struct {
+	maxGoroutines int
+	minSplitSize  int
+	shardSize     int
+	perRound      int
+
+	useAvxGNFI,
+	useAvx512GFNI,
+	useAVX512,
+	useAVX2,
+	useSSSE3,
+	useSSE2 bool
+
+	useJerasureMatrix    bool
+	usePAR1Matrix        bool
+	useCauchy            bool
+	fastOneParity        bool
+	inversionCache       bool
+	forcedInversionCache bool
+	customMatrix         [][]byte
+	withLeopard          leopardMode
+
+	// stream options
+	concReads  bool
+	concWrites bool
+	streamBS   int
+}
+
+var defaultOptions = options{
+	maxGoroutines:  384,
+	minSplitSize:   -1,
+	fastOneParity:  false,
+	inversionCache: true,
+
+	// Detect CPU capabilities.
+	useSSSE3:      cpuid.CPU.Supports(cpuid.SSSE3),
+	useSSE2:       cpuid.CPU.Supports(cpuid.SSE2),
+	useAVX2:       cpuid.CPU.Supports(cpuid.AVX2),
+	useAVX512:     cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL),
+	useAvx512GFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ),
+	useAvxGNFI:    cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI),
+}
+
+// leopardMode controls the use of leopard GF in encoding and decoding.
+type leopardMode int
+
+const (
+	// leopardAsNeeded only switches to leopard 16-bit when there are more than
+	// 256 shards.
+	leopardAsNeeded leopardMode = iota
+	// leopardGF16 uses leopard in 16-bit mode for all shard counts.
+	leopardGF16
+	// leopardAlways uses 8-bit leopard for shards less than or equal to 256,
+	// 16-bit leopard otherwise.
+	leopardAlways
+)
+
+func init() {
+	if runtime.GOMAXPROCS(0) <= 1 {
+		defaultOptions.maxGoroutines = 1
+	}
+}
+
+// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
+// Jobs will be split into this many parts, unless each goroutine would have to process
+// less than minSplitSize bytes (set with WithMinSplitSize).
+// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
+// scheduling.
+// If n <= 0, it is ignored.
+func WithMaxGoroutines(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.maxGoroutines = n
+		}
+	}
+}
+
+// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
+// specific shard size.
+// Send in the shard size you expect to send. Other shard sizes will work, but may not
+// run at the optimal speed.
+// Overwrites WithMaxGoroutines.
+// If shardSize <= 0, it is ignored.
+func WithAutoGoroutines(shardSize int) Option {
+	return func(o *options) {
+		o.shardSize = shardSize
+	}
+}
+
+// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
+// By default this parameter is determined by CPU cache characteristics.
+// See WithMaxGoroutines on how jobs are split.
+// If n <= 0, it is ignored.
+func WithMinSplitSize(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.minSplitSize = n
+		}
+	}
+}
+
+// WithConcurrentStreams will enable concurrent reads and writes on the streams.
+// Default: Disabled, meaning only one stream will be read/written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreams(enabled bool) Option {
+	return func(o *options) {
+		o.concReads, o.concWrites = enabled, enabled
+	}
+}
+
+// WithConcurrentStreamReads will enable concurrent reads from the input streams.
+// Default: Disabled, meaning only one stream will be read at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamReads(enabled bool) Option {
+	return func(o *options) {
+		o.concReads = enabled
+	}
+}
+
+// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
+// Default: Disabled, meaning only one stream will be written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamWrites(enabled bool) Option {
+	return func(o *options) {
+		o.concWrites = enabled
+	}
+}
+
+// WithInversionCache allows to control the inversion cache.
+// This will cache reconstruction matrices so they can be reused.
+// Enabled by default, or <= 64 shards for Leopard encoding.
+func WithInversionCache(enabled bool) Option {
+	return func(o *options) {
+		o.inversionCache = enabled
+		o.forcedInversionCache = true
+	}
+}
+
+// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
+// If not set, any shard size set with WithAutoGoroutines will be used.
+// If WithAutoGoroutines is also unset, 4MB will be used.
+// Ignored if not used on stream.
+func WithStreamBlockSize(n int) Option {
+	return func(o *options) {
+		o.streamBS = n
+	}
+}
+
+// WithSSSE3 allows to enable/disable SSSE3 instructions.
+// If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
+func WithSSSE3(enabled bool) Option {
+	return func(o *options) {
+		o.useSSSE3 = enabled
+	}
+}
+
+// WithAVX2 allows to enable/disable AVX2 instructions.
+// If not set, AVX will be turned on or off automatically based on CPU ID information.
+// This will also disable AVX GFNI instructions.
+func WithAVX2(enabled bool) Option {
+	return func(o *options) {
+		o.useAVX2 = enabled
+		if o.useAvxGNFI {
+			o.useAvxGNFI = enabled
+		}
+	}
+}
+
+// WithSSE2 allows to enable/disable SSE2 instructions.
+// If not set, SSE2 will be turned on or off automatically based on CPU ID information.
+func WithSSE2(enabled bool) Option {
+	return func(o *options) {
+		o.useSSE2 = enabled
+	}
+}
+
+// WithAVX512 allows to enable/disable AVX512 (and GFNI) instructions.
+func WithAVX512(enabled bool) Option {
+	return func(o *options) {
+		o.useAVX512 = enabled
+		o.useAvx512GFNI = enabled
+	}
+}
+
+// WithGFNI allows to enable/disable AVX512+GFNI instructions.
+// If not set, GFNI will be turned on or off automatically based on CPU ID information.
+func WithGFNI(enabled bool) Option {
+	return func(o *options) {
+		o.useAvx512GFNI = enabled
+	}
+}
+
+// WithAVXGFNI allows to enable/disable GFNI with AVX instructions.
+// If not set, GFNI will be turned on or off automatically based on CPU ID information.
+func WithAVXGFNI(enabled bool) Option {
+	return func(o *options) {
+		o.useAvxGNFI = enabled
+	}
+}
+
+// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
+// matrix in the same way as done by the Jerasure library.
+// The first row and column of the coding matrix only contains 1's in this method
+// so the first parity chunk is always equal to XOR of all data chunks.
+func WithJerasureMatrix() Option {
+	return func(o *options) {
+		o.useJerasureMatrix = true
+		o.usePAR1Matrix = false
+		o.useCauchy = false
+	}
+}
+
+// WithPAR1Matrix causes the encoder to build the matrix how PARv1
+// does. Note that the method they use is buggy, and may lead to cases
+// where recovery is impossible, even if there are enough parity
+// shards.
+func WithPAR1Matrix() Option {
+	return func(o *options) {
+		o.useJerasureMatrix = false
+		o.usePAR1Matrix = true
+		o.useCauchy = false
+	}
+}
+
+// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
+// The output of this is not compatible with the standard output.
+// A Cauchy matrix is faster to generate. This does not affect data throughput,
+// but will result in slightly faster start-up time.
+func WithCauchyMatrix() Option {
+	return func(o *options) {
+		o.useJerasureMatrix = false
+		o.usePAR1Matrix = false
+		o.useCauchy = true
+	}
+}
+
+// WithFastOneParityMatrix will switch the matrix to a simple xor
+// if there is only one parity shard.
+// The PAR1 matrix already has this property so it has little effect there.
+func WithFastOneParityMatrix() Option {
+	return func(o *options) {
+		o.fastOneParity = true
+	}
+}
+
+// WithCustomMatrix causes the encoder to use the manually specified matrix.
+// customMatrix represents only the parity chunks.
+// customMatrix must have at least ParityShards rows and DataShards columns.
+// It can be used for interoperability with libraries which generate
+// the matrix differently or to implement more complex coding schemes like LRC
+// (locally reconstructible codes).
+func WithCustomMatrix(customMatrix [][]byte) Option {
+	return func(o *options) {
+		o.customMatrix = customMatrix
+	}
+}
+
+// WithLeopardGF16 will always use leopard GF16 for encoding,
+// even when there is less than 256 shards.
+// This will likely improve reconstruction time for some setups.
+// This is not compatible with Leopard output for <= 256 shards.
+// Note that Leopard places certain restrictions on use see other documentation.
+func WithLeopardGF16(enabled bool) Option {
+	return func(o *options) {
+		if enabled {
+			o.withLeopard = leopardGF16
+		} else {
+			o.withLeopard = leopardAsNeeded
+		}
+	}
+}
+
+// WithLeopardGF will use leopard GF for encoding, even when there are fewer than
+// 256 shards.
+// This will likely improve reconstruction time for some setups.
+// Note that Leopard places certain restrictions on use see other documentation.
+func WithLeopardGF(enabled bool) Option {
+	return func(o *options) {
+		if enabled {
+			o.withLeopard = leopardAlways
+		} else {
+			o.withLeopard = leopardAsNeeded
+		}
+	}
+}
+
+func (o *options) cpuOptions() string {
+	var res []string
+	if o.useSSE2 {
+		res = append(res, "SSE2")
+	}
+	if o.useAVX2 {
+		res = append(res, "AVX2")
+	}
+	if o.useSSSE3 {
+		res = append(res, "SSSE3")
+	}
+	if o.useAVX512 {
+		res = append(res, "AVX512")
+	}
+	if o.useAvx512GFNI {
+		res = append(res, "AVX512+GFNI")
+	}
+	if o.useAvxGNFI {
+		res = append(res, "AVX+GFNI")
+	}
+	if len(res) == 0 {
+		return "pure Go"
+	}
+	return strings.Join(res, ",")
+}
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@ -0,0 +1,614 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"sync"
+)
+
+// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
+// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
+//
+// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
+// since the streaming interface has a start up overhead.
+//
+// For all operations, no readers and writers should not assume any order/size of
+// individual reads/writes.
+//
+// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
+// folder.
+type StreamEncoder interface {
+	// Encode parity shards for a set of data shards.
+	//
+	// Input is 'shards' containing readers for data shards followed by parity shards
+	// io.Writer.
+	//
+	// The number of shards must match the number given to NewStream().
+	//
+	// Each reader must supply the same number of bytes.
+	//
+	// The parity shards will be written to the writer.
+	// The number of bytes written will match the input size.
+	//
+	// If a data stream returns an error, a StreamReadError type error
+	// will be returned. If a parity writer returns an error, a
+	// StreamWriteError will be returned.
+	Encode(data []io.Reader, parity []io.Writer) error
+
+	// Verify returns true if the parity shards contain correct data.
+	//
+	// The number of shards must match the number total data+parity shards
+	// given to NewStream().
+	//
+	// Each reader must supply the same number of bytes.
+	// If a shard stream returns an error, a StreamReadError type error
+	// will be returned.
+	Verify(shards []io.Reader) (bool, error)
+
+	// Reconstruct will recreate the missing shards if possible.
+	//
+	// Given a list of valid shards (to read) and invalid shards (to write)
+	//
+	// You indicate that a shard is missing by setting it to nil in the 'valid'
+	// slice and at the same time setting a non-nil writer in "fill".
+	// An index cannot contain both non-nil 'valid' and 'fill' entry.
+	// If both are provided 'ErrReconstructMismatch' is returned.
+	//
+	// If there are too few shards to reconstruct the missing
+	// ones, ErrTooFewShards will be returned.
+	//
+	// The reconstructed shard set is complete, but integrity is not verified.
+	// Use the Verify function to check if data set is ok.
+	Reconstruct(valid []io.Reader, fill []io.Writer) error
+
+	// Split a an input stream into the number of shards given to the encoder.
+	//
+	// The data will be split into equally sized shards.
+	// If the data size isn't dividable by the number of shards,
+	// the last shard will contain extra zeros.
+	//
+	// You must supply the total size of your input.
+	// 'ErrShortData' will be returned if it is unable to retrieve the
+	// number of bytes indicated.
+	Split(data io.Reader, dst []io.Writer, size int64) (err error)
+
+	// Join the shards and write the data segment to dst.
+	//
+	// Only the data shards are considered.
+	//
+	// You must supply the exact output size you want.
+	// If there are to few shards given, ErrTooFewShards will be returned.
+	// If the total data size is less than outSize, ErrShortData will be returned.
+	Join(dst io.Writer, shards []io.Reader, outSize int64) error
+}
+
+// StreamReadError is returned when a read error is encountered
+// that relates to a supplied stream.
+// This will allow you to find out which reader has failed.
+type StreamReadError struct {
+	Err    error // The error
+	Stream int   // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamReadError) Error() string {
+	return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamReadError) String() string {
+	return s.Error()
+}
+
+// StreamWriteError is returned when a write error is encountered
+// that relates to a supplied stream. This will allow you to
+// find out which reader has failed.
+type StreamWriteError struct {
+	Err    error // The error
+	Stream int   // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamWriteError) Error() string {
+	return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamWriteError) String() string {
+	return s.Error()
+}
+
+// rsStream contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using NewStream()
+type rsStream struct {
+	r *reedSolomon
+	o options
+
+	// Shard reader
+	readShards func(dst [][]byte, in []io.Reader) error
+	// Shard writer
+	writeShards func(out []io.Writer, in [][]byte) error
+
+	blockPool sync.Pool
+}
+
+// NewStream creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of data shards is 256.
+func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+	if dataShards+parityShards > 256 {
+		return nil, ErrMaxShardNum
+	}
+
+	r := rsStream{o: defaultOptions}
+	for _, opt := range o {
+		opt(&r.o)
+	}
+	// Override block size if shard size is set.
+	if r.o.streamBS == 0 && r.o.shardSize > 0 {
+		r.o.streamBS = r.o.shardSize
+	}
+	if r.o.streamBS <= 0 {
+		r.o.streamBS = 4 << 20
+	}
+	if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
+		o = append(o, WithAutoGoroutines(r.o.streamBS))
+	}
+
+	enc, err := New(dataShards, parityShards, o...)
+	if err != nil {
+		return nil, err
+	}
+	r.r = enc.(*reedSolomon)
+
+	r.blockPool.New = func() interface{} {
+		return AllocAligned(dataShards+parityShards, r.o.streamBS)
+	}
+	r.readShards = readShards
+	r.writeShards = writeShards
+	if r.o.concReads {
+		r.readShards = cReadShards
+	}
+	if r.o.concWrites {
+		r.writeShards = cWriteShards
+	}
+
+	return &r, err
+}
+
+// NewStreamC creates a new encoder and initializes it to
+// the number of data shards and parity shards given.
+//
+// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
+func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
+	return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
+}
+
+func (r *rsStream) createSlice() [][]byte {
+	out := r.blockPool.Get().([][]byte)
+	for i := range out {
+		out[i] = out[i][:r.o.streamBS]
+	}
+	return out
+}
+
+// Encodes parity shards for a set of data shards.
+//
+// Input is 'shards' containing readers for data shards followed by parity shards
+// io.Writer.
+//
+// The number of shards must match the number given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+//
+// The parity shards will be written to the writer.
+// The number of bytes written will match the input size.
+//
+// If a data stream returns an error, a StreamReadError type error
+// will be returned. If a parity writer returns an error, a
+// StreamWriteError will be returned.
+func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
+	if len(data) != r.r.dataShards {
+		return ErrTooFewShards
+	}
+
+	if len(parity) != r.r.parityShards {
+		return ErrTooFewShards
+	}
+
+	all := r.createSlice()
+	defer r.blockPool.Put(all)
+	in := all[:r.r.dataShards]
+	out := all[r.r.dataShards:]
+	read := 0
+
+	for {
+		err := r.readShards(in, data)
+		switch err {
+		case nil:
+		case io.EOF:
+			if read == 0 {
+				return ErrShardNoData
+			}
+			return nil
+		default:
+			return err
+		}
+		out = trimShards(out, shardSize(in))
+		read += shardSize(in)
+		err = r.r.Encode(all)
+		if err != nil {
+			return err
+		}
+		err = r.writeShards(parity, out)
+		if err != nil {
+			return err
+		}
+	}
+}
+
+// Trim the shards so they are all the same size
+func trimShards(in [][]byte, size int) [][]byte {
+	for i := range in {
+		if len(in[i]) != 0 {
+			in[i] = in[i][0:size]
+		}
+		if len(in[i]) < size {
+			in[i] = in[i][:0]
+		}
+	}
+	return in
+}
+
+func readShards(dst [][]byte, in []io.Reader) error {
+	if len(in) != len(dst) {
+		panic("internal error: in and dst size do not match")
+	}
+	size := -1
+	for i := range in {
+		if in[i] == nil {
+			dst[i] = dst[i][:0]
+			continue
+		}
+		n, err := io.ReadFull(in[i], dst[i])
+		// The error is EOF only if no bytes were read.
+		// If an EOF happens after reading some but not all the bytes,
+		// ReadFull returns ErrUnexpectedEOF.
+		switch err {
+		case io.ErrUnexpectedEOF, io.EOF:
+			if size < 0 {
+				size = n
+			} else if n != size {
+				// Shard sizes must match.
+				return ErrShardSize
+			}
+			dst[i] = dst[i][0:n]
+		case nil:
+			continue
+		default:
+			return StreamReadError{Err: err, Stream: i}
+		}
+	}
+	if size == 0 {
+		return io.EOF
+	}
+	return nil
+}
+
+func writeShards(out []io.Writer, in [][]byte) error {
+	if len(out) != len(in) {
+		panic("internal error: in and out size do not match")
+	}
+	for i := range in {
+		if out[i] == nil {
+			continue
+		}
+		n, err := out[i].Write(in[i])
+		if err != nil {
+			return StreamWriteError{Err: err, Stream: i}
+		}
+		//
+		if n != len(in[i]) {
+			return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+		}
+	}
+	return nil
+}
+
+type readResult struct {
+	n    int
+	size int
+	err  error
+}
+
+// cReadShards reads shards concurrently
+func cReadShards(dst [][]byte, in []io.Reader) error {
+	if len(in) != len(dst) {
+		panic("internal error: in and dst size do not match")
+	}
+	var wg sync.WaitGroup
+	wg.Add(len(in))
+	res := make(chan readResult, len(in))
+	for i := range in {
+		if in[i] == nil {
+			dst[i] = dst[i][:0]
+			wg.Done()
+			continue
+		}
+		go func(i int) {
+			defer wg.Done()
+			n, err := io.ReadFull(in[i], dst[i])
+			// The error is EOF only if no bytes were read.
+			// If an EOF happens after reading some but not all the bytes,
+			// ReadFull returns ErrUnexpectedEOF.
+			res <- readResult{size: n, err: err, n: i}
+
+		}(i)
+	}
+	wg.Wait()
+	close(res)
+	size := -1
+	for r := range res {
+		switch r.err {
+		case io.ErrUnexpectedEOF, io.EOF:
+			if size < 0 {
+				size = r.size
+			} else if r.size != size {
+				// Shard sizes must match.
+				return ErrShardSize
+			}
+			dst[r.n] = dst[r.n][0:r.size]
+		case nil:
+		default:
+			return StreamReadError{Err: r.err, Stream: r.n}
+		}
+	}
+	if size == 0 {
+		return io.EOF
+	}
+	return nil
+}
+
+// cWriteShards writes shards concurrently
+func cWriteShards(out []io.Writer, in [][]byte) error {
+	if len(out) != len(in) {
+		panic("internal error: in and out size do not match")
+	}
+	var errs = make(chan error, len(out))
+	var wg sync.WaitGroup
+	wg.Add(len(out))
+	for i := range in {
+		go func(i int) {
+			defer wg.Done()
+			if out[i] == nil {
+				errs <- nil
+				return
+			}
+			n, err := out[i].Write(in[i])
+			if err != nil {
+				errs <- StreamWriteError{Err: err, Stream: i}
+				return
+			}
+			if n != len(in[i]) {
+				errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+			}
+		}(i)
+	}
+	wg.Wait()
+	close(errs)
+	for err := range errs {
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Verify returns true if the parity shards contain correct data.
+//
+// The number of shards must match the number total data+parity shards
+// given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+// If a shard stream returns an error, a StreamReadError type error
+// will be returned.
+func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
+	if len(shards) != r.r.totalShards {
+		return false, ErrTooFewShards
+	}
+
+	read := 0
+	all := r.createSlice()
+	defer r.blockPool.Put(all)
+	for {
+		err := r.readShards(all, shards)
+		if err == io.EOF {
+			if read == 0 {
+				return false, ErrShardNoData
+			}
+			return true, nil
+		}
+		if err != nil {
+			return false, err
+		}
+		read += shardSize(all)
+		ok, err := r.r.Verify(all)
+		if !ok || err != nil {
+			return ok, err
+		}
+	}
+}
+
+// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
+// "valid" and "fill" streams on the same index.
+// Therefore it is impossible to see if you consider the shard valid
+// or would like to have it reconstructed.
+var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
+
+// Reconstruct will recreate the missing shards if possible.
+//
+// Given a list of valid shards (to read) and invalid shards (to write)
+//
+// You indicate that a shard is missing by setting it to nil in the 'valid'
+// slice and at the same time setting a non-nil writer in "fill".
+// An index cannot contain both non-nil 'valid' and 'fill' entry.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete when explicitly asked for all missing shards.
+// However its integrity is not automatically verified.
+// Use the Verify function to check in case the data set is complete.
+func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
+	if len(valid) != r.r.totalShards {
+		return ErrTooFewShards
+	}
+	if len(fill) != r.r.totalShards {
+		return ErrTooFewShards
+	}
+
+	all := r.createSlice()
+	defer r.blockPool.Put(all)
+	reconDataOnly := true
+	for i := range valid {
+		if valid[i] != nil && fill[i] != nil {
+			return ErrReconstructMismatch
+		}
+		if i >= r.r.dataShards && fill[i] != nil {
+			reconDataOnly = false
+		}
+	}
+
+	read := 0
+	for {
+		err := r.readShards(all, valid)
+		if err == io.EOF {
+			if read == 0 {
+				return ErrShardNoData
+			}
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		read += shardSize(all)
+		all = trimShards(all, shardSize(all))
+
+		if reconDataOnly {
+			err = r.r.ReconstructData(all) // just reconstruct missing data shards
+		} else {
+			err = r.r.Reconstruct(all) //  reconstruct all missing shards
+		}
+		if err != nil {
+			return err
+		}
+		err = r.writeShards(fill, all)
+		if err != nil {
+			return err
+		}
+	}
+}
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+//
+// You must supply the exact output size you want.
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
+	// Do we have enough shards?
+	if len(shards) < r.r.dataShards {
+		return ErrTooFewShards
+	}
+
+	// Trim off parity shards if any
+	shards = shards[:r.r.dataShards]
+	for i := range shards {
+		if shards[i] == nil {
+			return StreamReadError{Err: ErrShardNoData, Stream: i}
+		}
+	}
+	// Join all shards
+	src := io.MultiReader(shards...)
+
+	// Copy data to dst
+	n, err := io.CopyN(dst, src, outSize)
+	if err == io.EOF {
+		return ErrShortData
+	}
+	if err != nil {
+		return err
+	}
+	if n != outSize {
+		return ErrShortData
+	}
+	return nil
+}
+
+// Split a an input stream into the number of shards given to the encoder.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't dividable by the number of shards,
+// the last shard will contain extra zeros.
+//
+// You must supply the total size of your input.
+// 'ErrShortData' will be returned if it is unable to retrieve the
+// number of bytes indicated.
+func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
+	if size == 0 {
+		return ErrShortData
+	}
+	if len(dst) != r.r.dataShards {
+		return ErrInvShardNum
+	}
+
+	for i := range dst {
+		if dst[i] == nil {
+			return StreamWriteError{Err: ErrShardNoData, Stream: i}
+		}
+	}
+
+	// Calculate number of bytes per shard.
+	perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
+
+	// Pad data to r.Shards*perShard.
+	paddingSize := (int64(r.r.totalShards) * perShard) - size
+	data = io.MultiReader(data, io.LimitReader(zeroPaddingReader{}, paddingSize))
+
+	// Split into equal-length shards and copy.
+	for i := range dst {
+		n, err := io.CopyN(dst[i], data, perShard)
+		if err != io.EOF && err != nil {
+			return err
+		}
+		if n != perShard {
+			return ErrShortData
+		}
+	}
+
+	return nil
+}
+
+type zeroPaddingReader struct{}
+
+var _ io.Reader = &zeroPaddingReader{}
+
+func (t zeroPaddingReader) Read(p []byte) (n int, err error) {
+	n = len(p)
+	for i := 0; i < n; i++ {
+		p[i] = 0
+	}
+	return n, nil
+}
--- a/vendor/github.com/klauspost/reedsolomon/unsafe.go
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe.go
@ -0,0 +1,41 @@
+//go:build !noasm && !nounsafe && !gccgo && !appengine
+
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2023, Klaus Post
+ */
+
+package reedsolomon
+
+import (
+	"unsafe"
+)
+
+// AllocAligned allocates 'shards' slices, with 'each' bytes.
+// Each slice will start on a 64 byte aligned boundary.
+func AllocAligned(shards, each int) [][]byte {
+	if false {
+		res := make([][]byte, shards)
+		for i := range res {
+			res[i] = make([]byte, each)
+		}
+		return res
+	}
+	const (
+		alignEach  = 64
+		alignStart = 64
+	)
+	eachAligned := ((each + alignEach - 1) / alignEach) * alignEach
+	total := make([]byte, eachAligned*shards+63)
+	align := uint(uintptr(unsafe.Pointer(&total[0]))) & (alignStart - 1)
+	if align > 0 {
+		total = total[alignStart-align:]
+	}
+	res := make([][]byte, shards)
+	for i := range res {
+		res[i] = total[:each:eachAligned]
+		total = total[eachAligned:]
+	}
+	return res
+}
--- a/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
@ -0,0 +1,23 @@
+//go:build noasm || nounsafe || gccgo || appengine
+
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2023, Klaus Post
+ */
+
+package reedsolomon
+
+// AllocAligned allocates 'shards' slices, with 'each' bytes.
+// Each slice will start on a 64 byte aligned boundary.
+func AllocAligned(shards, each int) [][]byte {
+	eachAligned := ((each + 63) / 64) * 64
+	total := make([]byte, eachAligned*shards+63)
+	// We cannot do initial align without "unsafe", just use native alignment.
+	res := make([][]byte, shards)
+	for i := range res {
+		res[i] = total[:each:eachAligned]
+		total = total[eachAligned:]
+	}
+	return res
+}
--- a/vendor/github.com/klauspost/reedsolomon/xor_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.go
@ -0,0 +1,19 @@
+//go:build !noasm && !appengine && !gccgo
+
+package reedsolomon
+
+//go:noescape
+func xorSliceNEON(in, out []byte)
+
+// simple slice xor
+func sliceXor(in, out []byte, o *options) {
+	xorSliceNEON(in, out)
+	done := (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		for i := done; i < len(in); i++ {
+			out[i] ^= in[i]
+		}
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/xor_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.s
@ -0,0 +1,29 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// func xorSliceNEON(in, out []byte)
+TEXT ·xorSliceNEON(SB), 7, $0
+	MOVD in_base+0(FP), R1
+	MOVD in_len+8(FP), R2    // length of message
+	MOVD out_base+24(FP), R5
+	SUBS $32, R2
+	BMI  completeXor
+
+loopXor:
+	// Main loop
+	VLD1.P 32(R1), [V0.B16, V1.B16]
+	VLD1   (R5), [V20.B16, V21.B16]
+
+	VEOR V20.B16, V0.B16, V4.B16
+	VEOR V21.B16, V1.B16, V5.B16
+
+	// Store result
+	VST1.P [V4.D2, V5.D2], 32(R5)
+
+	SUBS $32, R2
+	BPL  loopXor
+
+completeXor:
+	RET
+
--- a/vendor/github.com/klauspost/reedsolomon/xor_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/xor_noasm.go
@ -0,0 +1,7 @@
+//go:build noasm || gccgo || appengine || (!amd64 && !arm64)
+
+package reedsolomon
+
+func sliceXor(in, out []byte, o *options) {
+	sliceXorGo(in, out, o)
+}
--- a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
+++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@ -38,7 +38,7 @@ var X86 struct {
 	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
 	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
 	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
-	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions Instructions
+	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions
 	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
 	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
 	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
@ -54,6 +54,9 @@ var X86 struct {
 	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
 	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
 	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
+	HasAMXTile          bool // Advanced Matrix Extension Tile instructions
+	HasAMXInt8          bool // Advanced Matrix Extension Int8 instructions
+	HasAMXBF16          bool // Advanced Matrix Extension BFloat16 instructions
 	HasBMI1             bool // Bit manipulation instruction set 1
 	HasBMI2             bool // Bit manipulation instruction set 2
 	HasCX16             bool // Compare and exchange 16 Bytes
--- a/vendor/golang.org/x/sys/cpu/cpu_aix.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_aix.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build aix
-// +build aix

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (386 || amd64 || amd64p32) && gc
-// +build 386 amd64 amd64p32
-// +build gc

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gccgo
-// +build gccgo

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gccgo
-// +build gccgo

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (386 || amd64 || amd64p32) && gccgo
-// +build 386 amd64 amd64p32
-// +build gccgo

 #include <cpuid.h>
 #include <stdint.h>
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (386 || amd64 || amd64p32) && gccgo
-// +build 386 amd64 amd64p32
-// +build gccgo

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_linux.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !386 && !amd64 && !amd64p32 && !arm64
-// +build !386,!amd64,!amd64p32,!arm64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && (mips64 || mips64le)
-// +build linux
-// +build mips64 mips64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
-// +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && (ppc64 || ppc64le)
-// +build linux
-// +build ppc64 ppc64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_loong64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_loong64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build loong64
-// +build loong64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build mips64 || mips64le
-// +build mips64 mips64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build mips || mipsle
-// +build mips mipsle

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !linux && arm
-// +build !linux,arm

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !linux && !netbsd && !openbsd && arm64
-// +build !linux,!netbsd,!openbsd,arm64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !linux && (mips64 || mips64le)
-// +build !linux
-// +build mips64 mips64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !aix && !linux && (ppc64 || ppc64le)
-// +build !aix
-// +build !linux
-// +build ppc64 ppc64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !linux && riscv64
-// +build !linux,riscv64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build ppc64 || ppc64le
-// +build ppc64 ppc64le

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@ -3,10 +3,9 @@
 // license that can be found in the LICENSE file.

 //go:build riscv64
-// +build riscv64

 package cpu

-const cacheLineSize = 32
+const cacheLineSize = 64

 func initOptions() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_s390x.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/cpu/cpu_wasm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build wasm
-// +build wasm

 package cpu

--- a/vendor/golang.org/x/sys/cpu/cpu_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build 386 || amd64 || amd64p32
-// +build 386 amd64 amd64p32

 package cpu

@ -37,6 +36,9 @@ func initOptions() {
 		{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
 		{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
 		{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
+		{Name: "amxtile", Feature: &X86.HasAMXTile},
+		{Name: "amxint8", Feature: &X86.HasAMXInt8},
+		{Name: "amxbf16", Feature: &X86.HasAMXBF16},
 		{Name: "bmi1", Feature: &X86.HasBMI1},
 		{Name: "bmi2", Feature: &X86.HasBMI2},
 		{Name: "cx16", Feature: &X86.HasCX16},
@ -138,6 +140,10 @@ func archInit() {
 		eax71, _, _, _ := cpuid(7, 1)
 		X86.HasAVX512BF16 = isSet(5, eax71)
 	}
+
+	X86.HasAMXTile = isSet(24, edx7)
+	X86.HasAMXInt8 = isSet(25, edx7)
+	X86.HasAMXBF16 = isSet(22, edx7)
 }

 func isSet(bitpos uint, value uint32) bool {
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (386 || amd64 || amd64p32) && gc
-// +build 386 amd64 amd64p32
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/cpu/endian_big.go
+++ b/vendor/golang.org/x/sys/cpu/endian_big.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
-// +build armbe arm64be m68k mips mips64 mips64p32 ppc ppc64 s390 s390x shbe sparc sparc64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/endian_little.go
+++ b/vendor/golang.org/x/sys/cpu/endian_little.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
-// +build 386 amd64 amd64p32 alpha arm arm64 loong64 mipsle mips64le mips64p32le nios2 ppc64le riscv riscv64 sh wasm

 package cpu

--- a/vendor/golang.org/x/sys/cpu/hwcap_linux.go
+++ b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
@ -5,7 +5,7 @@
 package cpu

 import (
-	"io/ioutil"
+	"os"
 )

 const (
@ -39,7 +39,7 @@ func readHWCAP() error {
 		return nil
 	}

-	buf, err := ioutil.ReadFile(procAuxv)
+	buf, err := os.ReadFile(procAuxv)
 	if err != nil {
 		// e.g. on android /proc/self/auxv is not accessible, so silently
 		// ignore the error and leave Initialized = false. On some
--- a/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
+++ b/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && arm64
-// +build linux,arm64

 package cpu

--- a/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
+++ b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build go1.21
-// +build go1.21

 package cpu

--- a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
@ -9,7 +9,6 @@
 // gccgo's libgo and thus must not used a CGo method.

 //go:build aix && gccgo
-// +build aix,gccgo

 package cpu

--- a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
@ -7,7 +7,6 @@
 // (See golang.org/issue/32102)

 //go:build aix && ppc64 && gc
-// +build aix,ppc64,gc

 package cpu

--- a/vendor/golang.org/x/sys/execabs/execabs_go118.go
+++ b/vendor/golang.org/x/sys/execabs/execabs_go118.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !go1.19
-// +build !go1.19

 package execabs

--- a/vendor/golang.org/x/sys/execabs/execabs_go119.go
+++ b/vendor/golang.org/x/sys/execabs/execabs_go119.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build go1.19
-// +build go1.19

 package execabs

--- a/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
+++ b/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
@ -1,30 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package unsafeheader contains header declarations for the Go runtime's
-// slice and string implementations.
-//
-// This package allows x/sys to use types equivalent to
-// reflect.SliceHeader and reflect.StringHeader without introducing
-// a dependency on the (relatively heavy) "reflect" package.
-package unsafeheader
-
-import (
-	"unsafe"
-)
-
-// Slice is the runtime representation of a slice.
-// It cannot be used safely or portably and its representation may change in a later release.
-type Slice struct {
-	Data unsafe.Pointer
-	Len  int
-	Cap  int
-}
-
-// String is the runtime representation of a string.
-// It cannot be used safely or portably and its representation may change in a later release.
-type String struct {
-	Data unsafe.Pointer
-	Len  int
-}
--- a/vendor/golang.org/x/sys/plan9/pwd_go15_plan9.go
+++ b/vendor/golang.org/x/sys/plan9/pwd_go15_plan9.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build go1.5
-// +build go1.5

 package plan9

--- a/vendor/golang.org/x/sys/plan9/pwd_plan9.go
+++ b/vendor/golang.org/x/sys/plan9/pwd_plan9.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build !go1.5
-// +build !go1.5

 package plan9

--- a/vendor/golang.org/x/sys/plan9/race.go
+++ b/vendor/golang.org/x/sys/plan9/race.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build plan9 && race
-// +build plan9,race

 package plan9

--- a/vendor/golang.org/x/sys/plan9/race0.go
+++ b/vendor/golang.org/x/sys/plan9/race0.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build plan9 && !race
-// +build plan9,!race

 package plan9

--- a/vendor/golang.org/x/sys/plan9/str.go
+++ b/vendor/golang.org/x/sys/plan9/str.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build plan9
-// +build plan9

 package plan9

--- a/vendor/golang.org/x/sys/plan9/syscall.go
+++ b/vendor/golang.org/x/sys/plan9/syscall.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build plan9
-// +build plan9

 // Package plan9 contains an interface to the low-level operating system
 // primitives. OS details vary depending on the underlying system, and
--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_386.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_386.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.

 //go:build plan9 && 386
-// +build plan9,386

 package plan9

--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_amd64.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_amd64.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.

 //go:build plan9 && amd64
-// +build plan9,amd64

 package plan9

--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_arm.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_arm.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.

 //go:build plan9 && arm
-// +build plan9,arm

 package plan9

--- a/vendor/golang.org/x/sys/unix/aliases.go
+++ b/vendor/golang.org/x/sys/unix/aliases.go
@ -2,9 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos) && go1.9
-// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
-// +build go1.9
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos

 package unix

--- a/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
+++ b/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_386.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_386.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (freebsd || netbsd || openbsd) && gc
-// +build freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
-// +build darwin dragonfly freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (freebsd || netbsd || openbsd) && gc
-// +build freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (darwin || freebsd || netbsd || openbsd) && gc
-// +build darwin freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_ppc64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_ppc64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (darwin || freebsd || netbsd || openbsd) && gc
-// +build darwin freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build (darwin || freebsd || netbsd || openbsd) && gc
-// +build darwin freebsd netbsd openbsd
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_386.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_386.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_arm.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_arm.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build gc
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && arm64 && gc
-// +build linux
-// +build arm64
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && loong64 && gc
-// +build linux
-// +build loong64
-// +build gc

 #include "textflag.h"

--- a/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.

 //go:build linux && (mips64 || mips64le) && gc
-// +build linux
-// +build mips64 mips64le
-// +build gc

 #include "textflag.h"

--- a/Show More
+++ b/Show More