From 0b361cce1ea3f7da006893b4d4f8b179cd83ee96 Mon Sep 17 00:00:00 2001 From: Jay Geng Date: Fri, 25 Aug 2023 14:38:30 -0400 Subject: [PATCH] Update the soroban metering cap (#1376) * Update the soroban metering cap * fixup! Update the soroban metering cap * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh * Update core/cap-0046-10.md Co-authored-by: Siddharth Suresh --------- Co-authored-by: Graydon Hoare Co-authored-by: Siddharth Suresh --- .../Stellar-contract-config-setting.x | 247 ++++++++++++++++++ core/cap-0046-10.md | 141 ++++------ 2 files changed, 295 insertions(+), 93 deletions(-) create mode 100644 contents/cap-0046/Stellar-contract-config-setting.x diff --git a/contents/cap-0046/Stellar-contract-config-setting.x b/contents/cap-0046/Stellar-contract-config-setting.x new file mode 100644 index 000000000..bb76b3c63 --- /dev/null +++ b/contents/cap-0046/Stellar-contract-config-setting.x @@ -0,0 +1,247 @@ +%#include "xdr/Stellar-types.h" + +namespace stellar { +// General “Soroban execution lane” settings +struct ConfigSettingContractExecutionLanesV0 +{ + // maximum number of Soroban transactions per ledger + uint32 ledgerMaxTxCount; +}; + +// "Compute" settings for contracts (instructions and memory). +struct ConfigSettingContractComputeV0 +{ + // Maximum instructions per ledger + int64 ledgerMaxInstructions; + // Maximum instructions per transaction + int64 txMaxInstructions; + // Cost of 10000 instructions + int64 feeRatePerInstructionsIncrement; + + // Memory limit per transaction. Unlike instructions, there is no fee + // for memory, just the limit. + uint32 txMemoryLimit; +}; + +// Ledger access settings for contracts. +struct ConfigSettingContractLedgerCostV0 +{ + // Maximum number of ledger entry read operations per ledger + uint32 ledgerMaxReadLedgerEntries; + // Maximum number of bytes that can be read per ledger + uint32 ledgerMaxReadBytes; + // Maximum number of ledger entry write operations per ledger + uint32 ledgerMaxWriteLedgerEntries; + // Maximum number of bytes that can be written per ledger + uint32 ledgerMaxWriteBytes; + + // Maximum number of ledger entry read operations per transaction + uint32 txMaxReadLedgerEntries; + // Maximum number of bytes that can be read per transaction + uint32 txMaxReadBytes; + // Maximum number of ledger entry write operations per transaction + uint32 txMaxWriteLedgerEntries; + // Maximum number of bytes that can be written per transaction + uint32 txMaxWriteBytes; + + int64 feeReadLedgerEntry; // Fee per ledger entry read + int64 feeWriteLedgerEntry; // Fee per ledger entry write + + int64 feeRead1KB; // Fee for reading 1KB + + // The following parameters determine the write fee per 1KB. + // Write fee grows linearly until bucket list reaches this size + int64 bucketListTargetSizeBytes; + // Fee per 1KB write when the bucket list is empty + int64 writeFee1KBBucketListLow; + // Fee per 1KB write when the bucket list has reached `bucketListTargetSizeBytes` + int64 writeFee1KBBucketListHigh; + // Write fee multiplier for any additional data past the first `bucketListTargetSizeBytes` + uint32 bucketListWriteFeeGrowthFactor; +}; + +// Historical data (pushed to core archives) settings for contracts. +struct ConfigSettingContractHistoricalDataV0 +{ + int64 feeHistorical1KB; // Fee for storing 1KB in archives +}; + +// Contract event-related settings. +struct ConfigSettingContractEventsV0 +{ + // Maximum size of events that a contract call can emit. + uint32 txMaxContractEventsSizeBytes; + // Fee for generating 1KB of contract events. + int64 feeContractEvents1KB; +}; + +// Bandwidth related data settings for contracts. +// We consider bandwidth to only be consumed by the transaction envelopes, hence +// this concerns only transaction sizes. +struct ConfigSettingContractBandwidthV0 +{ + // Maximum sum of all transaction sizes in the ledger in bytes + uint32 ledgerMaxTxsSizeBytes; + // Maximum size in bytes for a transaction + uint32 txMaxSizeBytes; + + // Fee for 1 KB of transaction size + int64 feeTxSize1KB; +}; + +enum ContractCostType { + // Cost of running 1 wasm instruction + WasmInsnExec = 0, + // Cost of growing wasm linear memory by 1 page + WasmMemAlloc = 1, + // Cost of allocating a chuck of host memory (in bytes) + HostMemAlloc = 2, + // Cost of copying a chuck of bytes into a pre-allocated host memory + HostMemCpy = 3, + // Cost of comparing two slices of host memory + HostMemCmp = 4, + // Cost of a host function dispatch, not including the actual work done by + // the function nor the cost of VM invocation machinary + DispatchHostFunction = 5, + // Cost of visiting a host object from the host object storage. Exists to + // make sure some baseline cost coverage, i.e. repeatly visiting objects + // by the guest will always incur some charges. + VisitObject = 6, + // Cost of serializing an xdr object to bytes + ValSer = 7, + // Cost of deserializing an xdr object from bytes + ValDeser = 8, + // Cost of computing the sha256 hash from bytes + ComputeSha256Hash = 9, + // Cost of computing the ed25519 pubkey from bytes + ComputeEd25519PubKey = 10, + // Cost of accessing an entry in a Map. + MapEntry = 11, + // Cost of accessing an entry in a Vec + VecEntry = 12, + // Cost of verifying ed25519 signature of a payload. + VerifyEd25519Sig = 13, + // Cost of reading a slice of vm linear memory + VmMemRead = 14, + // Cost of writing to a slice of vm linear memory + VmMemWrite = 15, + // Cost of instantiation a VM from wasm bytes code. + VmInstantiation = 16, + // Cost of instantiation a VM from a cached state. + VmCachedInstantiation = 17, + // Cost of invoking a function on the VM. If the function is a host function, + // additional cost will be covered by `DispatchHostFunction`. + InvokeVmFunction = 18, + // Cost of computing a keccak256 hash from bytes. + ComputeKeccak256Hash = 19, + // Cost of computing an ECDSA secp256k1 pubkey from bytes. + ComputeEcdsaSecp256k1Key = 20, + // Cost of computing an ECDSA secp256k1 signature from bytes. + ComputeEcdsaSecp256k1Sig = 21, + // Cost of recovering an ECDSA secp256k1 key from a signature. + RecoverEcdsaSecp256k1Key = 22, + // Cost of int256 addition (`+`) and subtraction (`-`) operations + Int256AddSub = 23, + // Cost of int256 multiplication (`*`) operation + Int256Mul = 24, + // Cost of int256 division (`/`) operation + Int256Div = 25, + // Cost of int256 power (`exp`) operation + Int256Pow = 26, + // Cost of int256 shift (`shl`, `shr`) operation + Int256Shift = 27 +}; + +struct ContractCostParamEntry { + // use `ext` to add more terms (e.g. higher order polynomials) in the future + ExtensionPoint ext; + + int64 constTerm; + int64 linearTerm; +}; + +struct StateExpirationSettings { + uint32 maxEntryExpiration; + uint32 minTempEntryExpiration; + uint32 minPersistentEntryExpiration; + uint32 autoBumpLedgers; + + // rent_fee = wfee_rate_average / rent_rate_denominator_for_type + int64 persistentRentRateDenominator; + int64 tempRentRateDenominator; + + // max number of entries that emit expiration meta in a single ledger + uint32 maxEntriesToExpire; + + // Number of snapshots to use when calculating average BucketList size + uint32 bucketListSizeWindowSampleSize; + + // Maximum number of bytes that we scan for eviction per ledger + uint64 evictionScanSize; + + // Lowest BucketList level to be scanned to evict entries + uint32 startingEvictionScanLevel; +}; + +struct EvictionIterator { + uint32 bucketListLevel; + bool isCurrBucket; + uint64 bucketFileOffset; +}; + +// limits the ContractCostParams size to 20kB +const CONTRACT_COST_COUNT_LIMIT = 1024; + +typedef ContractCostParamEntry ContractCostParams; + +// Identifiers of all the network settings. +enum ConfigSettingID +{ + CONFIG_SETTING_CONTRACT_MAX_SIZE_BYTES = 0, + CONFIG_SETTING_CONTRACT_COMPUTE_V0 = 1, + CONFIG_SETTING_CONTRACT_LEDGER_COST_V0 = 2, + CONFIG_SETTING_CONTRACT_HISTORICAL_DATA_V0 = 3, + CONFIG_SETTING_CONTRACT_EVENTS_V0 = 4, + CONFIG_SETTING_CONTRACT_BANDWIDTH_V0 = 5, + CONFIG_SETTING_CONTRACT_COST_PARAMS_CPU_INSTRUCTIONS = 6, + CONFIG_SETTING_CONTRACT_COST_PARAMS_MEMORY_BYTES = 7, + CONFIG_SETTING_CONTRACT_DATA_KEY_SIZE_BYTES = 8, + CONFIG_SETTING_CONTRACT_DATA_ENTRY_SIZE_BYTES = 9, + CONFIG_SETTING_STATE_EXPIRATION = 10, + CONFIG_SETTING_CONTRACT_EXECUTION_LANES = 11, + CONFIG_SETTING_BUCKETLIST_SIZE_WINDOW = 12, + CONFIG_SETTING_EVICTION_ITERATOR = 13 +}; + +union ConfigSettingEntry switch (ConfigSettingID configSettingID) +{ +case CONFIG_SETTING_CONTRACT_MAX_SIZE_BYTES: + uint32 contractMaxSizeBytes; +case CONFIG_SETTING_CONTRACT_COMPUTE_V0: + ConfigSettingContractComputeV0 contractCompute; +case CONFIG_SETTING_CONTRACT_LEDGER_COST_V0: + ConfigSettingContractLedgerCostV0 contractLedgerCost; +case CONFIG_SETTING_CONTRACT_HISTORICAL_DATA_V0: + ConfigSettingContractHistoricalDataV0 contractHistoricalData; +case CONFIG_SETTING_CONTRACT_EVENTS_V0: + ConfigSettingContractEventsV0 contractEvents; +case CONFIG_SETTING_CONTRACT_BANDWIDTH_V0: + ConfigSettingContractBandwidthV0 contractBandwidth; +case CONFIG_SETTING_CONTRACT_COST_PARAMS_CPU_INSTRUCTIONS: + ContractCostParams contractCostParamsCpuInsns; +case CONFIG_SETTING_CONTRACT_COST_PARAMS_MEMORY_BYTES: + ContractCostParams contractCostParamsMemBytes; +case CONFIG_SETTING_CONTRACT_DATA_KEY_SIZE_BYTES: + uint32 contractDataKeySizeBytes; +case CONFIG_SETTING_CONTRACT_DATA_ENTRY_SIZE_BYTES: + uint32 contractDataEntrySizeBytes; +case CONFIG_SETTING_STATE_EXPIRATION: + StateExpirationSettings stateExpirationSettings; +case CONFIG_SETTING_CONTRACT_EXECUTION_LANES: + ConfigSettingContractExecutionLanesV0 contractExecutionLanes; +case CONFIG_SETTING_BUCKETLIST_SIZE_WINDOW: + uint64 bucketListSizeWindow<>; +case CONFIG_SETTING_EVICTION_ITERATOR: + EvictionIterator evictionIterator; +}; +} diff --git a/core/cap-0046-10.md b/core/cap-0046-10.md index 2a47fada0..cd85a1c23 100644 --- a/core/cap-0046-10.md +++ b/core/cap-0046-10.md @@ -35,14 +35,12 @@ The metered costs must align closely to the true costs of running a smart contra In addition, metering must have: - High coverage: metering needs to cover all the non-trivial work done by the host. -- Metering needs to err on the side of worst case of the true cost. -- Metering based on the worst case must not deviate too far (10x) from the average cost. +- Moderate overestimate: Metering needs to err on the side of worst case of the true cost, but should not be too far (within the same order of magnitude) from the average true cost. ### Design goals -- Explainability – the metering model should be simple enough to understand and to explain the cost composition of a contract. +- Simplicity – the metering model should be simple enough to understand. The cost composition should be easy to explain and reason about. - Extensibility and maintainability – should be straightforward to add metering to future code. Changes in the implementation should not require rewrite of metering. Every iteration of code changes should not require complete model re-calibration. -- Metering should be cheap – the act of meter charging should not amount to a significant cost. -- Being able to detect when metering is missing in code paths. +- Efficiency – metering model should enable succinct implementation in the host that can be executed efficiently. ### Goals alignment Aligns with the general goals of the overview [cap-0046](./cap-0046.md) as as well the fee model [cap-0047-07](./cap-0046-07.md). @@ -67,9 +65,8 @@ Components and blocks may be wild or tame: - Code is **tame** if it’s code we wrote or are maintaining a fork of. ### Requirements for a component -1. Depends on a single input. -2. Independent from other components. -3. The cost of each resource type - `cpu_insns` and `mem_bytes` - follows a linear or constant characteristics w.r.t. the input. +1. Can be modeled as a constant or linear function w.r.t. to a single input, on both resource types `cpu_insns` and `mem_bytes`. +2. Does not invoke another component. I.e. components are the the leafs of a call tree. ![Call tree diagram](../contents/cap-0046/0010/Call-tree-diagram.jpg) @@ -77,11 +74,11 @@ Components and blocks may be wild or tame: Consider the host code as a tree of called blocks and components (see figure 1), with the entrypoint at the root, blocks as interior nodes and components as leafs of the tree. We structure the host in such a way that ensures as an **invariant** that **every component in the call tree is metered on every path to it**. This is done by ensuring the following: -- Blocks consist only of trivial (un-metered) code, calls to components, and calls to other blocks. +- Blocks consist of only trivial (no need to meter) code, calls to components, and calls to other blocks. - Every piece of wild component is converted to a tame component, tracked by the cost model with a unique code number assigned to it. - Components are standalone and do not call other blocks or components — they are truly the leafs of the tree. -The full list of component types are defined in `enum ContractCostType`, see "XDR changes". +The full list of component types are defined in `enum ContractCostType`, see [XDR changes](#xdr-changes). Once the call-tree invariant is satisfied, we can ensure that if every single component is metered, the entire call-tree is metered. @@ -94,38 +91,40 @@ To obtain the parameters, we isolate the component and set up a benchmark sandbo The result of calibration for per resource type is a set of cost parameters of size `C x 2`, where `C` is the number of cost types. The cost parameters per resource type form a `ConfigSettingEntry`. ### The budget -The budget for each resource type is a `ConfigSettingEntry` that is determined in consensus by the validators. The budget reflects the ledger processing capacity in accordance to the requirements in the "Requirements" section. We can start with an initial `cpu_insns` budget of 4'000'000 and `mem_bytes` of 10MB. These numbers may change before this CAP finalizes. +The budget for each resource type is a `ConfigSettingEntry` that is determined in consensus by the validators. The budget reflects the ledger processing capacity in accordance to the requirements in the [Requirements](#requirements) section. We can start with an initial `cpu_insns` budget of 2'500'000 and `mem_bytes` of 2MiB. -At every metering charging, the total charges will be compared with the budget, and if exceeds, will result in a `SCEC_EXCEEDED_LIMIT` host error. +At every metering charge, the cumulative resource consumption will be compared with the budget, and if exceeded, will result in a `SCEC_EXCEEDED_LIMIT` host error. ### XDR changes -See [cap-0046 Overview](./cap-0046-01.md), specifically the `ConfigSettingEntry` which has four new additions corresponding to budget and metering for each resource type, as well as the new file [Stellar-contract-cost-type.x](../contents/cap-0046/Stellar-contract-cost-type.x) that defines the cost types `ContractCostType` and cost parameters entry `ContractCostParamEntry`. +See [cap-0046 Overview](./cap-0046-01.md) and [Stellar-contract-config-setting.x](../contents/cap-0046/Stellar-contract-config-setting.x) for the XDR changes. In particular `ConfigSettingEntry` +contains new entries for budget and metering. `ContractCostParamEntry` defines all cost component types with their explainations in the comment. ### Metering an arbitrary new piece of code -The above have so far presented the definition of components, the list of components already identified in the host and how to calibrate each component to obtain the cost parameters. - -The main challenge of dealing with an arbitrary new piece of code (what the host starts out to be) is to identify the components through an iterative process: +The main challenge of dealing with an arbitrary new piece of code (*wild* or *tame*) is to identify the components through an iterative process: 1. Break down the code into a call tree where each node consists of meaningful, non-trivial operation. -2. Identify the leaf nodes, making sure they are components according to the “requirements for a component”. -3. For any TC, meter it according to “metering a component” -4. If it contains any wild code, follow "taming wild code” to tame it. This step needs to be done in junction with 3. +2. Identify the leaf nodes, making sure they are components according to the [requirements for a component](#requirements-for-a-component). +3. For any *tame* component, meter it according to [metering a component](#metering-a-component) +4. If it contains any *wild* code, follow [taming wild code](#taming-wild-code) to tame it. This step needs to be done in conjunction with 3. 5. Start from the leaf nodes, mark them as metered, then proceed up level by level until the reaching root. -- If a node is composed of only metered children, it is a metered block. -- Once the root is metered, the call-tree invariant is satisfied and the entire call-tree is metered. + +If a node is composed of only metered children, it is a metered block. Once the root is metered, the call-tree invariant is satisfied and the entire call-tree is metered. ### Taming wild code -As mentioned previously, one of the keys to satisfying the call-tree invariant is that all wild code, blocks or components, be tamed. This consists of the following patterns -1. A tamed block (TB) calling a tamed component (TC) -2. A wild block (WB) calling a TC -3. A TB calling a wild component (WC) -4. A TB calling a wild block (WB), where the wild block (WB) calls some other WC which we do not have access to. -For 1 and 2, metering is already covered by the TC and there is nothing else we need to do. +As mentioned previously, one of the keys to satisfying the call-tree invariant is that all *wild* code, blocks or components, be tamed. A piece of *wild* code can appear in one of the following patterns: +1. Consists of a single wild component (**WC**) +2. A wild block (**WB**) that only consists of tamed blocks (**TB**s) and tamed components (**TC**s) +3. A WB that consists of a mixture of TCs (recall a TB is just a combination of TCs) and WCs which we do not have access to. +4. A WB that consist of several WCs + +For 1, we are calling a WC which is standalone and does not call us back. We can easily tame the WC by defining it as a metered component following [metering a component](#metering-a-component). -For 3, we are calling a WC which is standalone and does not call us back. We can easily tame the WC by attaching a metering harness to it. +For 2, metering is already covered by the tamed code and there is nothing else we need to do. -The tricky scenario is 4, where a TB calls into a WB that calls into a mixture of WCs and TCs (if all of them are WCs, then the entire WB becomes a WC and we are in scenario 3). We have two options to deal with this scenario: -1. Approximate the WB as a new WC, using proper assumptions to separate out all of its logic dependencies from any TCs. Figure 2 illustrates this process and compares the call tree before and after. -2. If 1 is not possible, we have to tame it the brute force way either by forking the code and modifying it, or choose a different library, or remove this functionality altogether. +For scenario 3, we first try to approximate the WB as pure wild code, i.e. by minimizing the footprint of TCs. Concretely this means during the calibration process, set up the samples (e.g. making `x = 0` in the linear function) such that the TBs have minimal effect on the output resource consumption. If this is possible, we end up in scenario 4. See figure 2 below for illustration. + +For scenario 4, we first approximate the WB as a single WC, by picking a single dominant input and calibrate it as a linear function. If it works, we end up back to scenario 1 and we are done. + +If either 3 or 4 fails, then we have to tame it the brute force way either by forking the code and modifying it, or choosing a different library, or removing our dependency on it altogether. ![Taming wild code](../contents/cap-0046/0010/Taming-a-call-tree.jpg) @@ -135,100 +134,56 @@ The tricky scenario is 4, where a TB calls into a WB that calls into a mixture o We use cpu instruction count as the main metrics for "compute" because it is a direct proxy to process running time, i.e. `run_time = cpu_insns_count / clock_freq / ave_insns_per_cycle`. The average instructions per cycle `ave_insns_per_cycle` depends on a set of CPU architecture-specific factors such as the instruction set, instruction length, micro-ops, instruction-level parallelism (which depends on instruction window size, branch-prediction), which are stable per architecture. -Assuming 2GHz cpu with an ave. insns per cycle of 2, 4'000'000 cpu instructions roughly equals 1ms. +Assuming 2GHz cpu with an avg. insns per cycle of 2, 1ms roughly equals 4'000'000 cpu instructions. -Note that the instruction count may vary across architectures, but the metering model needs to be same across various archs, so we will need to provide a guidance on recommended setup for metering calibration. +Note that the instruction count may vary across architectures, but the metering model needs to be same, so the metering model needs to produce the upper bound on all viable architectures. -Another considered alternative resource is execution time, which relates much closer to the actual cost in ledger closing time. However, execution time is much more volatile and less-deterministic, which make it a less desirable target metric for metering. +Another considered alternative resource is execution time, which relates much closer to the actual cost in ledger closing time. However, execution time is much more volatile and less deterministic, which make it a less desirable target metric for metering. ### Why `mem_bytes` metric -The bytes of memory allocated is a good proxy of the memory footprint of contract execution. The majority of the smart contract memory footprint comes from 1. a fixed-sized linear memory 2. immutable host objects created during contract execution, and both of these are not freed until the end of contract execution. This memory model is very similar to the arena allocator. Using allocated memory as the metric is an worst-case approximation that is 1. close to the actual memory cost 2. gives us flexibility to switch to an actual arena allocator which would make it the actual cost. - - +The bytes of memory allocated is a good proxy of the memory footprint of contract execution. The majority of the smart contract memory footprint comes from 1. a fixed-sized linear memory 2. immutable host objects created during contract execution, and both of these are not freed until the end of contract execution. This memory model is very similar to the arena allocator. Using allocated memory as the metric is an worst-case approximation that is 1. close to the actual memory cost 2. gives us flexibility to switch to an actual arena allocator later. ### Why do we have to model the costs? -In other words, why can't we profile the contract at runtime and use the results directly for metering? Because the profiling results are non-deterministic and 1. we can't use them for consensus 2. the contract execution outcome won't be able to be replayed bit-identically. Using an analytical model ensure determinism for consensus and replayability (more on this later). +In other words, why can't we measure and use the runtime resource consumption for metering? Because the profiling results are non-deterministic and 1. we can't use them for consensus 2. the contract execution outcome won't be able to be replayed bit-identically. Using an analytical model ensure determinism for consensus and replayability (more on this later). ### Why linear and constant components only? -The obvious reason is simplicity. We want the costs to follow a simple linear characteristic such that we can fit it accurately without needing a complex numerical model (and fitting process, heuristics etc). - -A model with higher order dependencies also risk the worst-case costs significantly outweighing the average, and any small deviation in the input resulting in significant over or underestimation of the costs. This goes against the design goals. +Simplicity. We want the costs to follow a simple linear characteristic such that we can fit it accurately without needing a complex numerical model (and fitting process, heuristics etc). +A model with higher order dependencies also risk the worst-case costs significantly outweighing the average, and any small deviation in the input resulting in significant over or underestimation of the costs. This goes against the [design goals](#design-goals). ### Host vs WASM vm -This metering framework is generic and does not differentiate between the host and the WASM vm. Both the host and the vm are treated as components and blocks defined in the "specification" section and subject to the same metering procedures. - -Our current choice of the WASM virtual machine implementation is Wasmi, which is a lightweight interpreter of the wasm standard, written in the same language (Rust) as the host. Wasmi runs an inner interpreter loop that executes a single wasm instruction on each loop. Thus every wasm instruction logic fits the requirements of a component. `WasmInsnExecT0~4` in `ContractCostType` are designated for the wasm instructions (instead of having one type designated to each of the 100+ wasm instructions, we group them into tiers 0~4 where each tier of wasm instructions costs relatively the same amount of cpu insns). - -We maintain a fork of Wasmi with metering added. This makes Wasmi is a tamed "wild component". - -(Note this does not mean we are tied to a particular wasm implementation, it's just an example. If we decide to switch to a different interpreter or JIT in the future, we will be able to apply the same procedure to derive a new set of metering components.) +This metering framework is generic and does not differentiate between the host and the WASM vm. Both the host and the vm are treated as components and blocks defined in the [specification](#specification) section and subject to the same metering procedures. +Our current choice of the WASM virtual machine implementation is Wasmi, which is a lightweight interpreter of the wasm standard, written in the same language (Rust) as the host. Wasmi runs an inner interpreter loop that executes a single wasm instruction on each loop. Thus every wasm instruction logic fits the requirements of a component. `WasmInsnExec` in `ContractCostType` is designated for the wasm instructions. ### Relation to cap-0046-07 (fee model) -[CAP-0046-07](./cap-0046-07.md) proposed a fee model for smart contracts taking into account ledger access, storage and computation (or "gas"). This CAP details the computation aspect. However, this proposal identifies cpu and memory as separate aspects of the compute cost that needs to be budgeted separately. This difference needs to be resolved before this CAP finalize, i.e., either expand gas network settings in 07 or consolidate the `cpu_insns` and `mem_bytes` into a single "gas" parameter in here. +[CAP-0046-07](./cap-0046-07.md) proposed a fee model for smart contracts taking into account ledger access, storage and computation. This CAP details the computation aspect which includes cpu and memory. The metered `cpu_insns` goes into the fee model as input to the "compute" fee. While `mem_bytes` is not part of the fee model, it is subject to the network limit. ### Cost estimation -This proposal relies on the "preflight" mechanism to provide users with cost estimation of a transaction. The total costs for each resource type as well as inputs to each individual cost type will be returned from the preflight simulation. These costs, however can only serve as guidance to the actual cost, since the ledger snapshot used for preflight may be outdated. Thus it is not guaranteed that a transaction staying below the budget during preflight will not exceed it during the actual run. +This proposal relies on the "preflight" mechanism to provide an estimation of the cpu and mem consumption in a transaction. These can only serve as guidance to the actual cost, since the ledger snapshot used for preflight may be outdated, as well as the actual logic during preflight and actual ("recording" vs "enforcing") modes may be different. Thus it is not guaranteed that a transaction staying below the budget during preflight will not exceed it during the actual run. -## Parameters Upgrade -Both the budget and metering parameters are stored on the ledger as `ConfigLedgerEntry` and their upgrade and validation process have been discussed in [CAP-0046-09](./cap-0046-09.md). In general, the parameters can be upgraded with or without a protocol version upgrade. +### Config Settings Upgrade +Both the budget and metering parameters are stored on the ledger as `ConfigSettingEntry` and their upgrade and validation process have been discussed in [CAP-0046-09](./cap-0046-09.md). In general, the settings can be upgraded with or without a protocol version upgrade. -In the case of a protocol version upgrade, here are the scenarios where the parameters also has to be upgraded: -- New blocks have been introduced in the host that require introducing new components. Such changes include e.g. a new crypto primitive function. Note that if a new block merely consists of trivial code and calling existing components, then it has no effect on metering and no upgrade is needed. -- Changes on the host components, or version changes in its dependencies (e.g. Rust) that result in observable difference in components' cost characteristics. In rare cases, if the cost characteristics becomes no longer linear, then the component needs to be broken down into finer sub-components. See "Taming wild code" section above. +In the case of a protocol version upgrade, here are the scenarios that also require a settings upgrade: +- New blocks have been introduced in the host that require introducing new components. Such changes include e.g. a new crypto primitive function. Note that if a new block merely consists of trivial code and calling existing components, then no settings upgrade is needed. +- Changes on the host components, or version changes in its dependencies that result in observable difference in components' cost characteristics. In rare cases, if the cost characteristics becomes no longer linear, then the component needs to be broken down into finer sub-components. See [Taming wild code](#taming-wild-code). ### The “metered” stamp We may need to introduce a new mechanism for stamping the metered entities in the host, following the definitions of wild/tamed components/blocks outlined in previous section. Such a mechanism would help us ensuring the call-tree invariant is satisfied by examining the root block. A further mechanism to automatically detect if metering is missing on a path would be even more ideal. We will also need to introduce set of reviewing standards that differentiates between block vs component changes. A metered component is subject to significantly higher bars for review and audit, to make sure the component criteria are truly satisfied, as they are the foundational building blocks of the budget metering framework. -## Open Issues +In the future we may add tooling around ensuring metering coverage and assisting with updating parameters or adding new metered components. ### Maintainability The cost parameters need to be maintained to prevent the metering model from gradually deviating away from reality (model drift). Even if we maintain the same host unchanged, the host's dependencies may change that result in small performance differences which can accumulate over time, causing the cost models to drift. To combat that, we will need to publish a set of specs where the metering calibration benchmark needs to be run regularly, along with a suite of tests and criteria for determining when the model parameters need to be updated. -### Versioning and Replayability - -Although the metering models are deterministic, the model inputs may vary across different software versions. For example, consider a third-party library routine that calls our host object comparison component `obj_compare` for an unknown number of times. The metering of that routine is therefore delegated to `obj_compare`. If a software upgrade happens to the routine which results in the number of `obj_compare` call to be increased from `N` to `N+1`, the cost will be different (which may effect the success-or-failure status of a contract) even though no other observable difference exists. In other words, due to the intricate relations between metering logic and the code logic under execution, the surface area of observable differences between a transaction's execution and its replay have been enlarged. - -This is not a problem for consensus, as long as all the validators maintain the exact same software version. There are two options to solve the replay problem: -1. Maintain multiple software versions simultaneously. For an old protocol version, its exact host software version needs to be included in the current stellar-core. A version map between protocol version and the host software version needs to be maintained and looked up during replay. In practice, the number of software versions could be less than the number of protocol versions, since a protocol version upgrade may not result in observable differences in any of the transactions' replay between the old and the new version. In which case, the older software version can be retired and replaced by the newer version, but this is more of an exception. -2. Make the cost results irrelevant in replay. In other words, relax the bit-identicalness requirement for contract execution costs. During replay: - - On a successful SC transaction, take the fee due to contract execution (cpu and memory costs) as the "truth" (in order to produce the correct hash), and ignore the metering logic which arrive to those results. Also set the budget to unlimited so the replay transaction cannot fail due to out of budget. - - On a failed SC transaction, skip the transaction. A failed SC transaction ought to not have any side effects, so that it is safe to be skipped. - -The main pro of option 1 is that it preserves the bit-identicalness property of replay, however, at the cost of increased maintenance burden. - -The rationale for option 2, besides easier software maintenance, is that the accounting logic of metering should not have any significance besides arriving at the success-or-failure status and the fee charged of a contract transaction. In other words, no other side effects should be produced as a result of metering that is relevant to the observable outcomes of a transaction, thus justifies the choice of skipping the metering process altogether during replay. - -However, there are several cons of option 2: -- Adds the limitation that metering cannot produce any side effect besides the execution cost numbers, which must be the end results in all current and future transactions. This prohibits the possibility that a contract transaction relies on intermediate execution cost results as part of its logic, such as deciding whether or not to call another contract based on how much budget it has remaining. -- Places a dependency of replay on the transaction results. -- The budget metering is just a special case in the broader issue of host software versioning. Even without budget metering, the surface area of potential differences between a live execution and its replay is already large and unpredictable, thus necessitates host multi-versioning. The budget metering just increases such surface area. - -Based on above concerns, option 1 is likely the preferred option. - -The broader issue of host software versioning will be discussed in a different chapter and must be finalized before this CAP finalizes. - ## Security Concerns Missed or inaccurate metering can cause security concerns in two aspects: - **Denial of Service**: the computed costs significantly underestimate the true cost of running a contract, this can slowdown the validators and prevent them to close the ledger in an acceptable time frame. - **Under-Utilization of the Ledger Capacity**: this is not a direct attack per se. However, a side effect of overestimation in metering, is the ledger could be filled with many (deliberately crafted) fast contract transactions which theoretically could require more resource at the worst case, causing the ledger to be under-utilized. This may in turn cause other (important) transactions to queue up and not making into the ledger in a reasonable time. ## Implementation -The budget and metering, calibration has been implemented in the host, primarily: -- [PR 118](https://github.com/stellar/rs-soroban-env/pull/118) contains the initial budget and metering framework -- [PR 307](https://github.com/stellar/rs-soroban-env/pull/307) more comprehensive coverage of metering -- [PR 561](https://github.com/stellar/rs-soroban-env/pull/561) adds the calibration framework -- [PR 597](https://github.com/stellar/rs-soroban-env/pull/597) calibration for wasm instructions - -in Wasmi (our fork of the Wasm interpreter): -- [PR 1](https://github.com/stellar/wasmi/pull/1) -- [PR 10](https://github.com/stellar/wasmi/pull/10) - -and in the sdk: -- [PR 789](https://github.com/stellar/rs-soroban-sdk/pull/789) - -The stellar-core side implementation has not been done yet. \ No newline at end of file +Metering, budget and calibration has been implemented in the [soroban-env](https://github.com/stellar/rs-soroban-env). Related integration work (such as the config settings) have been done in stellar-core and [soroban-sdk](https://github.com/stellar/rs-soroban-sdk). \ No newline at end of file