-
Notifications
You must be signed in to change notification settings - Fork 11.7k
[RPC] VM error visibility indexing #26595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
ce7ef8a
c32b90f
c55ac5f
ab0c0e3
075964c
c296ef6
c165a1c
06842bb
05c92a5
56d1e3d
4306021
15ef267
0ed0ba6
ddcdc1c
d8d0590
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,7 +139,7 @@ use sui_types::effects::{ | |
| InputConsensusObject, SignedTransactionEffects, TransactionEffects, TransactionEffectsAPI, | ||
| TransactionEvents, VerifiedSignedTransactionEffects, | ||
| }; | ||
| use sui_types::error::{ExecutionError, SuiErrorKind, UserInputError}; | ||
| use sui_types::error::{ExecutionError, ExecutionErrorContext, SuiErrorKind, UserInputError}; | ||
| use sui_types::event::EventID; | ||
| use sui_types::executable_transaction::VerifiedExecutableTransaction; | ||
| use sui_types::execution_status::ExecutionErrorKind; | ||
|
|
@@ -1461,7 +1461,7 @@ impl AuthorityState { | |
| certificate: &VerifiedExecutableTransaction, | ||
| mut execution_env: ExecutionEnv, | ||
| epoch_store: &Arc<AuthorityPerEpochStore>, | ||
| ) -> ExecutionOutput<(TransactionEffects, Option<ExecutionError>)> { | ||
| ) -> ExecutionOutput<(TransactionEffects, Option<ExecutionErrorContext>)> { | ||
| let _scope = monitored_scope("Execution::try_execute_immediately"); | ||
| let _metrics_guard = self.metrics.internal_execution_latency.start_timer(); | ||
|
|
||
|
|
@@ -1640,7 +1640,10 @@ impl AuthorityState { | |
| &self, | ||
| certificate: &VerifiedCertificate, | ||
| execution_env: ExecutionEnv, | ||
| ) -> (VerifiedSignedTransactionEffects, Option<ExecutionError>) { | ||
| ) -> ( | ||
| VerifiedSignedTransactionEffects, | ||
| Option<ExecutionErrorContext>, | ||
| ) { | ||
| let epoch_store = self.epoch_store_for_testing(); | ||
| let (effects, execution_error_opt) = self | ||
| .try_execute_immediately( | ||
|
|
@@ -1658,7 +1661,10 @@ impl AuthorityState { | |
| &self, | ||
| executable: &VerifiedExecutableTransaction, | ||
| execution_env: ExecutionEnv, | ||
| ) -> (VerifiedSignedTransactionEffects, Option<ExecutionError>) { | ||
| ) -> ( | ||
| VerifiedSignedTransactionEffects, | ||
| Option<ExecutionErrorContext>, | ||
| ) { | ||
| let epoch_store = self.epoch_store_for_testing(); | ||
| let (effects, execution_error_opt) = self | ||
| .try_execute_immediately(executable, execution_env, &epoch_store) | ||
|
|
@@ -1734,7 +1740,7 @@ impl AuthorityState { | |
| ) -> ExecutionOutput<( | ||
| TransactionOutputs, | ||
| Vec<ExecutionTiming>, | ||
| Option<ExecutionError>, | ||
| Option<ExecutionErrorContext>, | ||
| )> { | ||
| let _scope = monitored_scope("Execution::process_certificate"); | ||
| let tx_digest = *certificate.digest(); | ||
|
|
@@ -1905,40 +1911,69 @@ impl AuthorityState { | |
| rewritten_inputs: Option<Vec<bool>>, | ||
| signer: SuiAddress, | ||
| tx_digest: TransactionDigest, | ||
| is_fullnode: bool, | ||
| ) -> ( | ||
| InnerTemporaryStore, | ||
| SuiGasStatus, | ||
| TransactionEffects, | ||
| Vec<ExecutionTiming>, | ||
| Result<(), ExecutionError>, | ||
| Result<(), ExecutionErrorContext>, | ||
| ) { | ||
| let (inner_temp_store, gas_status, effects, timings, execution_error) = executor | ||
| // TODO only run this function on FullNodes, use `execute_transaction_to_effects` on validators. | ||
| .execute_transaction_to_effects_and_execution_error( | ||
| store, | ||
| protocol_config, | ||
| self.metrics.execution_metrics.clone(), | ||
| enable_expensive_checks, | ||
| execution_params, | ||
| epoch_id, | ||
| epoch_timestamp_ms, | ||
| input_objects, | ||
| gas_data, | ||
| if is_fullnode { | ||
|
jordanjennings-mysten marked this conversation as resolved.
|
||
| let (inner_temp_store, gas_status, effects, timings, execution_error) = executor | ||
| .execute_transaction_to_effects_and_execution_error( | ||
| store, | ||
| protocol_config, | ||
| self.metrics.execution_metrics.clone(), | ||
| enable_expensive_checks, | ||
| execution_params, | ||
| epoch_id, | ||
| epoch_timestamp_ms, | ||
| input_objects, | ||
| gas_data, | ||
| gas_status, | ||
| kind, | ||
| rewritten_inputs, | ||
| signer, | ||
| tx_digest, | ||
| &mut None, | ||
| ); | ||
|
|
||
| ( | ||
| inner_temp_store, | ||
| gas_status, | ||
| kind, | ||
| rewritten_inputs, | ||
| signer, | ||
| tx_digest, | ||
| &mut None, | ||
| ); | ||
| effects, | ||
| timings, | ||
| execution_error, | ||
| ) | ||
| } else { | ||
| let (inner_temp_store, gas_status, effects, timings, execution_failure) = executor | ||
| .execute_transaction_to_effects( | ||
| store, | ||
| protocol_config, | ||
| self.metrics.execution_metrics.clone(), | ||
| enable_expensive_checks, | ||
| execution_params, | ||
| epoch_id, | ||
| epoch_timestamp_ms, | ||
| input_objects, | ||
| gas_data, | ||
| gas_status, | ||
| kind, | ||
| rewritten_inputs, | ||
| signer, | ||
| tx_digest, | ||
| &mut None, | ||
| ); | ||
|
|
||
| ( | ||
| inner_temp_store, | ||
| gas_status, | ||
| effects, | ||
| timings, | ||
| execution_error, | ||
| ) | ||
| ( | ||
| inner_temp_store, | ||
| gas_status, | ||
| effects, | ||
| timings, | ||
| execution_failure.map_err(ExecutionErrorContext::from), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be worthwhile to split the extra metadata into its own distinct type? I.e. have The benefit (for me) of using separate types, is that I would be able to clearly see which part of the richer data is dropped in the validator path, because it would be statically This also fits the onward data flow better because the execution error would need to be indexed with the rest of the base transaction effect data, while the metadata goes into its own table.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would also recommend we try to align on the shape of the type using a protobuf message and storing the protobuf message itself in the DB so that we can iterate on the shape if need be.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bmwill, is it possible to do that today with TideHunter/TypedStore? I was personally less worried about the evolvability because the metadata structure is already quite generic, but agree that if it's possible, then using a more generic protobuf value would be nicer for future evolvability.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put prost and serialize and it seemed to work out, I expect we will add some protos somewhere at some point but for now just used the prost derive. |
||
| ) | ||
| } | ||
| } | ||
|
|
||
| /// execute_certificate validates the transaction input, and executes the certificate, | ||
|
|
@@ -1961,7 +1996,7 @@ impl AuthorityState { | |
| ) -> ExecutionOutput<( | ||
| TransactionOutputs, | ||
| Vec<ExecutionTiming>, | ||
| Option<ExecutionError>, | ||
| Option<ExecutionErrorContext>, | ||
| )> { | ||
| let _scope = monitored_scope("Execution::prepare_certificate"); | ||
| let _metrics_guard = self.metrics.prepare_certificate_latency.start_timer(); | ||
|
|
@@ -2025,7 +2060,7 @@ impl AuthorityState { | |
| let tracking_store = TrackingBackingStore::new(self.get_backing_store().as_ref()); | ||
|
|
||
| #[allow(unused_mut)] | ||
| let (inner_temp_store, _, mut effects, timings, execution_error_opt) = self | ||
| let (inner_temp_store, _, mut effects, timings, execution_error_result) = self | ||
|
jordanjennings-mysten marked this conversation as resolved.
Outdated
|
||
| .execute_transaction_to_effects( | ||
| &**epoch_store.executor(), | ||
| &tracking_store, | ||
|
|
@@ -2048,8 +2083,11 @@ impl AuthorityState { | |
| rewritten_inputs, | ||
| signer, | ||
| tx_digest, | ||
| self.is_fullnode(epoch_store), | ||
| ); | ||
|
|
||
| let execution_error_opt = execution_error_result.err(); | ||
|
|
||
| let object_funds_checker = self.object_funds_checker.load(); | ||
| if let Some(object_funds_checker) = object_funds_checker.as_ref() | ||
| && !object_funds_checker.should_commit_object_funds_withdraws( | ||
|
|
@@ -2159,6 +2197,10 @@ impl AuthorityState { | |
| &tracking_store.into_read_objects(), | ||
| ); | ||
|
|
||
| let execution_error_metadata = execution_error_opt | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the possible size of this error metadata? If I recall correctly, this will be added onto fullnodes, I assume in a table somewhere. If it's very large, can it cause any disruption/issues during insertion/retrieval?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my napkin math says about 1 gb of storage per week with 300 bytes per error, 2% error rate on ~20 million txs a day, depending on when the fullnode prunes it could be a bit smaller, maybe more in the 0.33gb range. based on a 5 hour sample window a few weeks back.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for sharing more concrete numbers. I was wondering if there's any limits in the DB of how much data can be inserted in a field in a column?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think they are quite large but you would hit performance problems at some point since its rocks db. |
||
| .as_ref() | ||
| .and_then(ExecutionErrorContext::metadata_with_source); | ||
|
|
||
| // index certificate | ||
| let _ = self | ||
| .post_process_one_tx(certificate, &effects, &inner_temp_store, epoch_store) | ||
|
|
@@ -2174,6 +2216,7 @@ impl AuthorityState { | |
| effects, | ||
| inner_temp_store, | ||
| unchanged_loaded_runtime_objects, | ||
| execution_error_metadata, | ||
| ); | ||
|
|
||
| let elapsed = prepare_certificate_start_time.elapsed().as_micros() as f64; | ||
|
|
@@ -2187,7 +2230,7 @@ impl AuthorityState { | |
| ); | ||
| } | ||
|
|
||
| ExecutionOutput::Success((transaction_outputs, timings, execution_error_opt.err())) | ||
| ExecutionOutput::Success((transaction_outputs, timings, execution_error_opt)) | ||
| } | ||
|
|
||
| pub fn prepare_certificate_for_benchmark( | ||
|
|
@@ -2209,7 +2252,10 @@ impl AuthorityState { | |
| epoch_store, | ||
| ) | ||
| .unwrap(); | ||
| Ok((transaction_outputs, execution_error_opt)) | ||
| Ok(( | ||
| transaction_outputs, | ||
| execution_error_opt.map(ExecutionError::from), | ||
| )) | ||
| } | ||
|
|
||
| #[instrument(skip_all)] | ||
|
|
@@ -2339,7 +2385,7 @@ impl AuthorityState { | |
| let execution_error_source = execution_result | ||
| .as_ref() | ||
| .err() | ||
| .and_then(|e| e.source().as_ref().map(|e| e.to_string())); | ||
| .and_then(|e| std::error::Error::source(e).map(|e| e.to_string())); | ||
|
jordanjennings-mysten marked this conversation as resolved.
Outdated
|
||
|
|
||
| let response = DryRunTransactionBlockResponse { | ||
| suggested_gas_price, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.