Added commitments support to the tree, excluding the actual math; that'll be added later.

This commit is contained in:
Daniel Lamberger 2023-09-20 13:12:39 +03:00
parent 3d65d6e8e3
commit 9eb5c9e93c
8 changed files with 334 additions and 47 deletions

4
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,4 @@
{
"editor.detectIndentation": false,
"editor.insertSpaces": true
}

5
.vscode/tasks.json vendored
View File

@ -6,10 +6,13 @@
{
"label": "Build Nim Project",
"type": "shell",
"command": "nim --out:bin/main -d:debug --opt:none --hints:off --debugger:native c -r main testOnSave",
"command": "nim --out:bin/main -d:debug --opt:none --hints:off --debugger:native c -r tests/test_all testOnSave",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"clear": true // clear terminal
}
}
]

View File

@ -8,7 +8,8 @@ license = "Apache License 2.0"
skipDirs = @["tests"]
requires "nim >= 1.6.0",
"unittest2"
"unittest2",
"elvis"
let nimc = getEnv("NIMC", "nim") # Which nim compiler to use
let lang = getEnv("NIMLANG", "c") # Which backend (c/cpp/js)

View File

@ -0,0 +1,186 @@
# Nimbus
# Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
## This module provides methods to generate commitments for tree nodes
import
std/[tables, sequtils],
elvis,
./tree
{.push warning[DotLikeOps]: off.}
# Todo: Initialize to montgomery X=0, Y=1, Z=1
const IdentityPoint = Point()
# Todo: implement; this is a mock
proc BanderwagonMultiMapToScalarField(fields: var openArray[Field], points: openArray[Point]) =
for i in 0..<points.len:
fields[i] = points[i].X
# Todo: implement; this is a mock
proc BanderwagonAddPoint(dst: var Point, src: Point) =
dst.X[0] += src.X[0]
# Todo: implement; this is a mock
proc BandesnatchSubtract(x, y: Field): Field =
[x[0] - y[0], 0, 0, 0]
# Todo: implement; this is a mock
proc IpaCommitToPoly(poly: array[256, Field]): Point =
var x,y,z: Field
for field in poly:
x[0] += field[0]
Point(X:x, Y:y, Z:z)
# Todo: implement
proc initializeCommitment*(vn: ValuesNode) =
discard
# Todo: implement; this is a mock; we set the commitment's X[0] (uint64) to be
# the sum of the last byte of all stored values.
proc updateCommitment*(vn: ValuesNode, index: byte, newValue: ref Bytes32) =
if vn.values[index] != nil:
vn.commitment.X[0] -= vn.values[index][^1]
if newValue != nil:
vn.commitment.X[0] += newValue[^1]
proc snapshotChildCommitment*(node: BranchesNode, childIndex: byte) =
## Stores the current commitment of the child node denoted by `childIndex`
## into the `node`'s `commitmentsSnapshot` table, and allocates the table if
## needed. In case the child is nil, the empty Identity commitment is stored.
## This is done so that we can later compute the delta between the child's
## current commitment and updated commitment. That delta will be used to
## update the parent `node`'s own commitment.
if node.commitmentsSnapshot == nil:
node.commitmentsSnapshot = new Table[byte, Point]
let childCommitment = node.branches[childIndex].?commitment ?: IdentityPoint
discard node.commitmentsSnapshot.hasKeyOrPut(childIndex, childCommitment)
proc updateAllCommitments*(tree: BranchesNode) =
## Updates the commitments of all modified nodes in the tree, bottom-up.
if tree.commitmentsSnapshot == nil:
return
var levels: array[31, seq[BranchesNode]]
levels[0].add(tree)
for node, depth, _ in tree.enumerateModifiedTree():
if node of BranchesNode and node.BranchesNode.commitmentsSnapshot != nil:
levels[depth].add(node.BranchesNode)
for depth in countdown(30, 0):
let nodes = levels[depth]
if nodes.len == 0:
continue
var points: seq[Point]
var childIndexes: seq[byte]
for node in nodes:
for index, commitment in node.commitmentsSnapshot:
points.add(commitment)
points.add(node.branches[index].commitment)
childIndexes.add(index)
var frs = newSeq[Field](points.len)
BanderwagonMultiMapToScalarField(frs, points)
var deltas = newSeq[Field]()
for pair in frs.distribute(int(frs.len / 2)):
deltas.add(BandesnatchSubtract(pair[1], pair[0]))
var deltasIdx, childIndexesIdx = 0
for node in nodes:
var poly: array[256, Field]
for _ in 0 ..< node.commitmentsSnapshot.len:
poly[childIndexes[childIndexesIdx]] = deltas[deltasIdx]
inc(childIndexesIdx)
inc(deltasIdx)
node.commitmentsSnapshot = nil
node.commitment.BanderwagonAddPoint(IpaCommitToPoly(poly))
#[
There are several possible approaches on how to track mutations to the trie and
update commitments. They have tradeoffs in performance, RAM usage and software
complexity.
One approach, used in the Geth client, is to have tree nodes store a mapping
between the index of a child node and its commitment before modifications. This
allows traversing the modified portion of the tree by starting from the root
node and going down any node that has a non-empty mapping. Let's call this the
"in-tree" approach.
To update commitments, 31 lists are allocated (the maximum depth of the tree
minus one), the modified nodes are traversed, and appended to the list matching
their depth. Then, all modified nodes at the lowest layer of the tree have their
commitments updated, using bulk operations where possible. Then, the layer above
is updated, and uses the cached commitments of its modified children to compute
the delta between their previous and current commitments, and update their own
comitments accordingly. This process goes on up the tree layers till the root is
reached. The mappings with old commitment values are cleared along the way.
A similar approach could be to store the modified node commitments outside of
the tree in a separate tree. A kind of copy-on-write snapshot.
Another possible approach is to have a list of modifications done to the tree.
Each entry could hold a reference to the respective node. The list can be stored
separately from the tree. To update commitments, we could sort the list
according to the depth at which a modification was made (deepest first), then
take ranges in the list (per depth) and bulk-update commitments, while storing
the original commitments aside for the next iteration. Let's call this the
"modifications list" approach. It has some advantages and disadvantages.
We chose to use the "in-tree" approach. However, it's worth reevaluating this
later on when the implementation matures a bit. Here's a brain dump of the pros
and cons.
Approach: In-tree Mods list
Performance:
Ram increase for unmodified tree map ptr parent ptr, depth, index
Ram increase for modified tree map per node; fragmented. low; entry in list
commitment per node
Change tracking performance map nil test, allocation, add entry to list
lookup, copy commitment
Commitments update performance Alloc 32 arrays & layout sort list; O(n*log(n))
nodes by depth; O(n) populate temp list per depth
Commitments update pipeline Leaves: immediately Leaves: later
Branches: later Branches: later
Change management:
Tracking multiple updates at same location No Yes
Merging multiple updates at same location Yes; map lookup No; need to de-dup
Undo changes to tree in case of chain reorg Impossible Run changes list in reverse
Untracking non-change (set same value) Hard Easy; not appending to the list
Serialization & logging Only final state; All changes; stand-alone
tied to tree
Maintenance:
Implementation complexity Low High
Separation of tree operations vs commitments Medium Good
Future:
Flexibility in bulk operations Good; all modified Medium; handling one depth
(incl. database writes) state is in arrays layer at a time
Flexibility for future optimizations Good; instant access Medium; handling one depth
to all modified state layer at a time
Parent-child relationship during comms update Weak Strong
]#

View File

@ -10,7 +10,8 @@
import
std/[sequtils, sugar],
".."/[utils, config],
./tree
./tree,
./commitment
when TraceLogs: import std/strformat
@ -18,9 +19,16 @@ proc setValue(node: ValuesNode, index: byte, value: Bytes32) =
## Heap-allocates the given `value` and stores it at the given `index`
var heapValue = new Bytes32
heapValue[] = value
node.updateCommitment(index, heapValue)
node.values[index] = heapValue
proc deleteValue(node: ValuesNode, index: byte) =
## Deletes the value at the given `index`, if any
node.updateCommitment(index, nil)
node.values[index] = nil
# TODO: prevent setting a value from a non-root node
proc setValue*(node: BranchesNode, key: Bytes32, value: Bytes32) =
## Stores the given `value` in the tree at the given `key`
@ -31,6 +39,7 @@ proc setValue*(node: BranchesNode, key: Bytes32, value: Bytes32) =
# Walk down the tree till the branch closest to the key
while current.branches[key[depth]] of BranchesNode:
when TraceLogs: echo &"At node {cast[uint64](current)}. Going down to branch '{key[depth].toHex}' at depth {depth}"
current.snapshotChildCommitment(key[depth])
current = current.branches[key[depth]].BranchesNode
inc(depth)
@ -42,26 +51,32 @@ proc setValue*(node: BranchesNode, key: Bytes32, value: Bytes32) =
# If the stem differs from the key, we can't use that ValuesNode. We need to
# insert intermediate branches till the point they diverge, pushing down the
# current ValuesNode, and the proceed to create a new ValuesNode
# current ValuesNode, and then proceed to create a new ValuesNode
# Todo: zip makes a memory allocation. avoid.
var divergence = vn.stem.zip(key).firstMatchAt(tup => tup[0] != tup[1])
if divergence.found:
when TraceLogs: echo &" Key: {key.toHex}"
when TraceLogs: echo &" Found difference at depth {divergence.index}"
when TraceLogs: echo &" Found difference at depth {divergence.index}; inserting intermediate branches"
while depth < divergence.index:
let newBranch = new BranchesNode
current.snapshotChildCommitment(key[depth])
current.branches[key[depth]] = newBranch
when TraceLogs: echo &"At node {cast[uint64](current)}. Replaced ValuesNode with a new branch at '{key[depth].toHex}', depth {depth}, new branch addr {cast[uint64](newBranch)}"
when TraceLogs: echo &"At node {cast[uint64](current)}. Assigned new branch at '{key[depth].toHex}', depth {depth}, addr {cast[uint64](newBranch)}"
current = newBranch
inc(depth)
current.branches[vn.stem[depth]] = vn
when TraceLogs: echo &"At node {cast[uint64](current)}. Assigned ValuesNode to new branch at '{vn.stem[depth].toHex}', depth {depth}, ValuesNodes addr {cast[uint64](vn)}"
current.snapshotChildCommitment(vn.stem[depth])
current.branches[vn.stem[depth]] = vn
when TraceLogs: echo &"At node {cast[uint64](current)}. Assigned ValuesNode at '{vn.stem[depth].toHex}', depth {depth}, addr {cast[uint64](vn)}"
vn = nil # We can't use it
current.snapshotChildCommitment(key[depth])
# The current branch does not contain a ValuesNode at the required offset;
# create one
if vn == nil:
vn = new ValuesNode
vn.stem[0..<31] = key[0..<31]
vn.initializeCommitment()
current.branches[key[depth]] = vn
when TraceLogs: echo &"Created ValuesNode at depth {depth}, branch '{key[depth].toHex}', stem {vn.stem.toHex}"
@ -70,6 +85,7 @@ proc setValue*(node: BranchesNode, key: Bytes32, value: Bytes32) =
when TraceLogs: echo &"Added value to slot '{key[^1].toHex}'"
proc deleteValue*(node: BranchesNode, key: Bytes32): bool =
## Deletes the value associated with the given `key` from the tree.
var current = node
@ -79,6 +95,7 @@ proc deleteValue*(node: BranchesNode, key: Bytes32): bool =
# Walk down the tree until the branch closest to the key
while current.branches[key[depth]] of BranchesNode:
when TraceLogs: echo &"At node {cast[uint64](current)}. Going down to branch '{key[depth].toHex}' at depth {depth}"
current.snapshotChildCommitment(key[depth])
current = current.branches[key[depth]].BranchesNode
inc(depth)
@ -96,7 +113,9 @@ proc deleteValue*(node: BranchesNode, key: Bytes32): bool =
# If the stem matches the key, we found the ValuesNode for the key.
# We remove it by setting the branch to nil.
current.branches[key[depth]].ValuesNode.values[key[^1]] = nil
current.snapshotChildCommitment(key[depth])
vn.deleteValue(key[^1])
return true
# If no ValuesNode was found for the key, it means the value doesn't exist.

View File

@ -9,26 +9,35 @@
## pretty printing and serialization methods
import
std/streams,
std/[streams, tables, strformat],
../utils
# TODO: make sizes configurable
type
Bytes32* = array[32, byte]
## A 32-bytes blob that can represent a verkle key or value
Field* = array[4, uint64]
Point* = object
X*, Y*, Z*: Field
Node* = ref object of RootObj
## Base node type
commitment*: Point
BranchesNode* = ref object of Node
## Internal node in the tree that holds references to 256 child nodes (or nil-s)
branches*: array[256, Node]
commitmentsSnapshot*: ref Table[byte, Point]
ValuesNode* = ref object of Node
## Leaf node in the tree that holds references to 256 values (or nil-s)
stem*: array[31, byte]
values*: array[256, ref Bytes32]
c1*, c2*: Point
iterator enumerateTree*(node: BranchesNode):
@ -56,7 +65,7 @@ iterator enumerateTree*(node: BranchesNode):
inc(last.index)
if child != nil:
# If the child node is non-empty, return it
yield (node: child, depth: stack.len.uint8, index: last.index.uint8 - 1)
yield (node: child, depth: stack.len.uint8, index: (last.index-1).uint8)
# If the child is a BranchesNode, we push it to the stack and start
# iterating its own children next iteration (starting from index 0)
@ -64,6 +73,21 @@ iterator enumerateTree*(node: BranchesNode):
stack.add((child.BranchesNode, 0))
iterator enumerateModifiedTree*(node: BranchesNode, depth: uint8 = 1):
tuple[node: Node, depth: uint8, index: uint8] {.closure.} =
## Iterates over all the nodes in the tree which were modified, or had one of
## their descendants modified
if node.commitmentsSnapshot != nil:
for index in node.commitmentsSnapshot.keys:
let child = node.branches[index]
yield (child, depth, index)
if child of BranchesNode:
for item in enumerateModifiedTree(child.BranchesNode, depth + 1):
yield item
iterator enumerateValues*(node: BranchesNode):
tuple[key: Bytes32, value: ref Bytes32] =
## Iterates over all the key-value pairs in the tree
@ -84,7 +108,9 @@ iterator enumerateValues*(node: BranchesNode):
proc printTreeValues*(node: BranchesNode, stream: Stream) =
## Writes all the key-value pairs into the given `stream`, in the form:
##
## (hex key) --> (hex value)
##
## (hex key) --> (hex value)
for key, value in node.enumerateValues():
stream.writeAsHex(key)
@ -95,7 +121,9 @@ proc printTreeValues*(node: BranchesNode, stream: Stream) =
proc `$`*(node: BranchesNode): string =
## Returns all the key-value pairs in the tree in the form:
##
## (hex key) --> (hex value)
##
## (hex key) --> (hex value)
var stream = newStringStream()
printTreeValues(node, stream)
@ -105,20 +133,26 @@ proc `$`*(node: BranchesNode): string =
proc printTree*(node: BranchesNode, stream: Stream) =
## Writes all the nodes and values into the given `stream`.
## Outputs a line for each branch and value in the tree, indented by depth.
stream.writeLine("<Tree root>")
## Outputs a line for each branch, stem and value in the tree, indented by
## depth, along with their commitment.
stream.writeLine(&"<Tree root> Branch. Commitment: {node.commitment.X[0].byte.toHex}")
for n, depth, parentIndex in node.enumerateTree():
for _ in 0 ..< depth.int:
stream.write(" ")
stream.writeAsHex(parentIndex.byte)
stream.writeLine()
if (n of ValuesNode):
if n of BranchesNode:
for _ in depth.int .. 68:
stream.write(" ")
stream.writeLine(&"Branch. Commitment: {n.commitment.X[0].byte.toHex}")
elif n of ValuesNode:
stream.writeAsHex(n.ValuesNode.stem[depth..^1])
for _ in 0 .. 37:
stream.write(" ")
stream.writeLine(&"Leaves. Commitment: {n.commitment.X[0].byte.toHex}")
for valueIndex, value in n.ValuesNode.values.pairs:
if value != nil:
for _ in 0 .. depth.int:
stream.write(" ")
stream.writeAsHex(n.ValuesNode.stem[depth..^1])
stream.write(" ")
stream.writeAsHex(valueIndex.byte)
stream.write(" --> ")
stream.writeAsHex(value[])
stream.writeLine()
stream.writeLine(" Leaf.")

View File

@ -29,7 +29,7 @@ proc bitsToHex*(b: byte): char =
of 13: result = 'd'
of 14: result = 'e'
of 15: result = 'f'
else: raise newException(Defect, "")
else: raise newException(ValueError, "Given byte must be uint4 (0-15)")
proc hexToBits*(c: char): byte =
@ -51,7 +51,7 @@ proc hexToBits*(c: char): byte =
of 'd', 'D': result = 13
of 'e', 'E': result = 14
of 'f', 'F': result = 15
else: raise newException(Defect, "")
else: raise newException(ValueError, "Character must be hexadecimal (a-f | A-F | 0-9)")
proc writeAsHex*(stream: Stream, b: byte) =

View File

@ -8,10 +8,10 @@
## The main module. Provides some tests.
import
std/[random, streams, os],
std/[random, streams, os, sequtils],
unittest2,
../eth_verkle/utils,
../eth_verkle/tree/[tree, operations]
../eth_verkle/tree/[tree, operations, commitment]
createDir "testResults"
@ -25,31 +25,68 @@ suite "main":
result[16 ..< 24] = cast[array[8, byte]](random.next())
result[24 ..< 32] = cast[array[8, byte]](random.next())
proc toBlob32(str: string): Bytes32 =
result[0..^1] = str.fromHex
iterator hexKvpsToBlob32(kvps: openArray[tuple[key: string, value: string]]):
tuple[key: Bytes32, value: Bytes32] =
for (hexKey, hexValue) in kvps:
yield (hexKey.toBlob32, hexValue.toBlob32)
let sampleKvps = @[
("0000000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("1100000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("2200000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("2211000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("3300000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("3300000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000"),
("33000000000000000000000000000000000000000000000000000000000000ff", "0000000000000000000000000000000000000000000000000000000000000000"),
("4400000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("4400000011000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("5500000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000000"),
("5500000000000000000000000000000000000000000000000000000000001100", "0000000000000000000000000000000000000000000000000000000000000000"),
("0000000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000001"),
("000102030405060708090a0b0c0d0e0f000102030405060708090a0b0c0d0e0f", "0000000000000000000000000000000000000000000000000000000000000002"),
("1100000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000003"),
("2200000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000004"),
("2211000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000005"),
("3300000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000006"),
("3300000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000007"),
("33000000000000000000000000000000000000000000000000000000000000ff", "0000000000000000000000000000000000000000000000000000000000000008"),
("4400000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000009"),
("4400000011000000000000000000000000000000000000000000000000000000", "000000000000000000000000000000000000000000000000000000000000000a"),
("5500000000000000000000000000000000000000000000000000000000000000", "000000000000000000000000000000000000000000000000000000000000000b"),
("5500000000000000000000000000000000000000000000000000000000001100", "000000000000000000000000000000000000000000000000000000000000000c"),
]
let updateKvps = @[
("0000000000000000000000000000000000000000000000000000000000000000", "0000000000000000000000000000000000000000000000000000000000000011"),
("1100000000000000000000000000000000000000000000000000000000010000", "0000000000000000000000000000000000000000000000000000000000000012"),
("4400000011000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000013"),
]
let deleteKvps = @[
"1100000000000000000000000000000000000000000000000000000000010000",
"2211000000000000000000000000000000000000000000000000000000000000",
"5500000000000000000000000000000000000000000000000000000000001100"
]
test "testOnSave":
## Prints a small sample tree
var tree = new BranchesNode
for (keyHex, valueHex) in sampleKvps:
var key, value: Bytes32
key[0..^1] = keyHex.fromHex
value[0..^1] = valueHex.fromHex
tree.setValue(key, value)
proc printAndTestCommitments(tree: BranchesNode) =
tree.updateAllCommitments()
#echo $tree # print keys --> values
tree.printTree(newFileStream(stdout)) # prints full tree
var expectedCommitment = tree.enumerateValues.toSeq.foldl(a + b.value[^1], 0.byte)
check tree.commitment.X[0] == expectedCommitment
test "testOnSave":
echo "Populating tree...\n"
var tree = new BranchesNode
for (key, value) in sampleKvps.hexKvpsToBlob32():
tree.setValue(key, value)
tree.printAndTestCommitments()
echo "\n\nUpdating tree...\n\n"
for (key, value) in updateKvps.hexKvpsToBlob32():
tree.setValue(key, value)
tree.printAndTestCommitments()
echo "\n\nDeleting nodes:"
echo deleteKvps.foldl(a & " " & b & "\n", "")
for key in deleteKvps:
discard tree.deleteValue(key.toBlob32)
tree.printAndTestCommitments()
test "testDelValues":
## Makes a small sample tree
@ -62,11 +99,11 @@ suite "main":
## Deletes some values
key[0..^1] = sampleKvps[6][0].fromHex
doAssert tree.deleteValue(key) == true
check tree.deleteValue(key) == true
key[0..^1] = sampleKvps[7][0].fromHex
doAssert tree.deleteValue(key) == true
check tree.deleteValue(key) == true
key[0..^1] = sampleKvps[8][0].fromHex
doAssert tree.deleteValue(key) == true
check tree.deleteValue(key) == true
tree.printTree(newFileStream(stdout)) # prints full tree
test "testDelNonExistingValues":
@ -80,12 +117,15 @@ suite "main":
tree.setValue(key1, value)
tree.setValue(key2, value)
doAssert tree.deleteValue(key3) == false
check tree.deleteValue(key3) == false
test "randomValues_10000":
## Writes a larger tree with random nodes to a file
var tree = new BranchesNode
for i in 0..10000:
tree.setValue(key = makeRandomBlob32(), value = makeRandomBlob32())
tree.printTree(newFileStream(open("testResults/randomValues_10000", fmWrite)))
tree.updateAllCommitments()
var file = open("testResults/randomValues_10000", fmWrite)
defer: close(file)
tree.printTree(newFileStream(file))
echo "Tree dumped to 'testResults/randomValues_10000'"