diff --git a/.qoder/docs/jsonrpc-api-spec.json b/.qoder/docs/jsonrpc-api-spec.json new file mode 100644 index 0000000000..4d821d5bdf --- /dev/null +++ b/.qoder/docs/jsonrpc-api-spec.json @@ -0,0 +1,1353 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "VIZ Blockchain JSON-RPC API Specification", + "description": "Complete specification of all JSON-RPC methods exposed by VIZ node plugins. Intended as a machine-readable spec for API explorer generation.", + "version": "1.0.0", + "plugins": [ + { + "name": "validator_api", + "description": "Provides read-only access to validator (witness) data: schedules, votes, and validator registration info.", + "methods": [ + { + "method": "get_active_validators", + "description": "Returns the list of currently active validator account names that are participating in block production.", + "aliases": ["get_active_witnesses"], + "params": [], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of account names of currently active validators." + } + }, + { + "method": "get_validator_schedule", + "description": "Returns the current validator schedule object, including the shuffled list of validators and their timeshares.", + "aliases": ["get_witness_schedule"], + "params": [], + "returns": { + "type": "object", + "description": "The validator_schedule_object containing current_shuffled_validators, timeshare, and related scheduling data." + } + }, + { + "method": "get_validators", + "description": "Returns a list of validator objects by their database IDs. For each ID, returns either the validator_api_object or null if not found.", + "aliases": ["get_witnesses"], + "params": [ + { + "name": "validator_ids", + "caption": "Validator IDs", + "description": "Array of validator object database IDs to look up.", + "type": "array", + "items": { "type": "integer" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of optional validator_api_object entries, one per requested ID." + } + }, + { + "method": "get_validator_by_account", + "description": "Returns the validator object registered under a specific account name, or null if the account is not a validator.", + "aliases": ["get_witness_by_account"], + "params": [ + { + "name": "account_name", + "caption": "Account Name", + "description": "The account name to look up as a validator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The validator_api_object for the account, or null if not a validator.", + "nullable": true + } + }, + { + "method": "get_validators_by_vote", + "description": "Returns validators sorted by total votes (descending). Starts from a given account name. Only returns validators with votes > 0. Maximum 100 results.", + "aliases": ["get_witnesses_by_vote"], + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name to start from. Use empty string to start from the top.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of validator_api_object entries sorted by vote count." + } + }, + { + "method": "get_validators_by_counted_vote", + "description": "Returns validators sorted by counted votes (descending). Starts from a given account name. Only returns validators with counted_votes > 0. Maximum 100 results.", + "aliases": ["get_witnesses_by_counted_vote"], + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name to start from. Use empty string to start from the top.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of validator_api_object entries sorted by counted vote." + } + }, + { + "method": "get_validator_count", + "description": "Returns the total number of registered validators on the blockchain.", + "aliases": ["get_witness_count"], + "params": [], + "returns": { + "type": "integer", + "description": "Total count of registered validators." + } + }, + { + "method": "lookup_validator_accounts", + "description": "Looks up validator account names starting from a lower bound. Returns up to 1000 results alphabetically.", + "aliases": ["lookup_witness_accounts"], + "params": [ + { + "name": "lower_bound_name", + "caption": "Lower Bound Name", + "description": "The lower bound of the first account name to return. Use empty string to start from the beginning.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of validator account names matching the query." + } + } + ] + }, + { + "name": "account_history", + "description": "Tracks operations by account and provides per-account operation history queries.", + "methods": [ + { + "method": "get_account_history", + "description": "Returns a map of operations for a given account in the sequence range [from-limit, from]. Each account operation has a sequence number starting from 0. Use from=-1 (4294967295) to get the most recent operations.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose operation history to retrieve.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Sequence", + "description": "The absolute sequence number. Use -1 (4294967295) for the most recent operation.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of operations to return. Must be between 1 and 1000. Must be less than 'from' unless from is -1.", + "type": "integer", + "required": true, + "minimum": 1, + "maximum": 1000 + } + ], + "returns": { + "type": "object", + "description": "Map of sequence number to applied_operation objects for the account." + } + } + ] + }, + { + "name": "operation_history", + "description": "Tracks all blockchain operations and provides block-level and transaction-level operation queries.", + "methods": [ + { + "method": "get_ops_in_block", + "description": "Returns the sequence of operations included or generated within a particular block. Virtual operations are generated by the blockchain (e.g. rewards) as opposed to user-submitted operations.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose operations should be returned.", + "type": "integer", + "required": true + }, + { + "name": "only_virtual", + "caption": "Only Virtual", + "description": "Whether to only include virtual operations in the returned results.", + "type": "boolean", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of applied_operation objects from the specified block." + } + }, + { + "method": "get_transaction", + "description": "Returns a transaction by its ID, including block number and transaction index within the block.", + "params": [ + { + "name": "id", + "caption": "Transaction ID", + "description": "The hash (SHA-256 / ripemd160) of the transaction to retrieve.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "annotated_signed_transaction with block_num and transaction_num fields added." + } + } + ] + }, + { + "name": "database_api", + "description": "The core read-only API for the blockchain database. Provides access to blocks, accounts, chain properties, authority validation, vesting delegations, and more.", + "methods": [ + { + "method": "get_block_header", + "description": "Retrieves a block header by block number.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose header should be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The block header, or null if no matching block was found.", + "nullable": true + } + }, + { + "method": "get_block", + "description": "Retrieves a full, signed block by block number.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The full signed block, or null if no matching block was found.", + "nullable": true + } + }, + { + "method": "get_irreversible_block_header", + "description": "Retrieves a block header only if the block is irreversible. Returns null if the block has not yet been finalized.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block whose header should be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The block header if the block is irreversible, or null.", + "nullable": true + } + }, + { + "method": "get_irreversible_block", + "description": "Retrieves a full, signed block only if it is irreversible. Returns null if the block has not yet been finalized.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to be returned.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The full signed block if irreversible, or null.", + "nullable": true + } + }, + { + "method": "set_block_applied_callback", + "description": "Sets a callback function that is triggered on each newly generated block. Used for real-time block notifications via WebSocket.", + "params": [ + { + "name": "callback", + "caption": "Callback", + "description": "Callback function to invoke when a new block is applied.", + "type": "function", + "required": true + } + ], + "returns": { + "type": "null", + "description": "No return value (callback-based)." + } + }, + { + "method": "get_config", + "description": "Retrieves compile-time constants and configuration values of the blockchain (e.g., chain ID, symbol, precision).", + "params": [], + "returns": { + "type": "object", + "description": "Object containing blockchain compile-time configuration constants." + } + }, + { + "method": "get_dynamic_global_properties", + "description": "Retrieves the current dynamic global properties object, which contains real-time chain state such as head block number, total supply, and other dynamic metrics.", + "params": [], + "returns": { + "type": "object", + "description": "The dynamic_global_property_api_object with current chain state." + } + }, + { + "method": "get_chain_properties", + "description": "Retrieves the chain properties as set by the median validator schedule (chain-wide constraints like account creation fee, maximum block size, etc.).", + "params": [], + "returns": { + "type": "object", + "description": "chain_api_properties object with median chain parameters." + } + }, + { + "method": "get_hardfork_version", + "description": "Returns the current hardfork version of the blockchain.", + "params": [], + "returns": { + "type": "string", + "description": "The current hardfork version string (e.g. '0.23.0')." + } + }, + { + "method": "get_next_scheduled_hardfork", + "description": "Returns the next scheduled hardfork version and the time it is planned to go live.", + "params": [], + "returns": { + "type": "object", + "description": "Object with hf_version (string) and live_time (ISO timestamp)." + } + }, + { + "method": "get_accounts", + "description": "Returns full account objects for a list of account names. Includes balances, vesting, authority, and validator votes.", + "params": [ + { + "name": "names", + "caption": "Account Names", + "description": "Array of account names to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of account_api_object entries. Only accounts that exist are returned." + } + }, + { + "method": "lookup_account_names", + "description": "Looks up accounts by their names. Returns an optional account object for each name; null if the account does not exist.", + "params": [ + { + "name": "account_names", + "caption": "Account Names", + "description": "Array of account names to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of optional account_api_object entries. Each element may be null." + } + }, + { + "method": "lookup_accounts", + "description": "Looks up account names starting from a lower bound. Returns a set of account names in alphabetical order.", + "params": [ + { + "name": "lower_bound_name", + "caption": "Lower Bound Name", + "description": "The lower bound of the first account name to return.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of account names matching the query." + } + }, + { + "method": "get_account_count", + "description": "Returns the total number of accounts registered on the blockchain.", + "params": [], + "returns": { + "type": "integer", + "description": "Total number of registered accounts." + } + }, + { + "method": "get_master_history", + "description": "Returns the master authority change history for a given account, useful for account recovery audits.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose master authority history to retrieve.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of master_authority_history_api_object entries." + } + }, + { + "method": "get_recovery_request", + "description": "Returns the current account recovery request for an account, if one exists.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose recovery request to check.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The account_recovery_request_api_object, or null if no request exists.", + "nullable": true + } + }, + { + "method": "get_escrow", + "description": "Returns the escrow object for a given sender and escrow ID.", + "params": [ + { + "name": "from", + "caption": "From Account", + "description": "The account name of the escrow sender.", + "type": "string", + "required": true + }, + { + "name": "escrow_id", + "caption": "Escrow ID", + "description": "The numeric escrow ID to look up.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "The escrow_api_object, or null if not found.", + "nullable": true + } + }, + { + "method": "get_withdraw_routes", + "description": "Returns vesting withdrawal routes for a given account. Can filter by direction (incoming, outgoing, or all).", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose withdrawal routes to retrieve.", + "type": "string", + "required": true + }, + { + "name": "type", + "caption": "Route Type", + "description": "Filter direction: 'incoming', 'outgoing', or 'all'.", + "type": "string", + "enum": ["incoming", "outgoing", "all"], + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of withdraw_route objects with from_account, to_account, percent, auto_vest." + } + }, + { + "method": "get_vesting_delegations", + "description": "Returns vesting delegation objects for a given account. Supports pagination and filtering by delegated or received.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The delegator or delegatee account name.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From", + "description": "The account name to start from for pagination.", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results. Defaults to 100. Must not exceed 1000.", + "type": "integer", + "required": false, + "default": 100, + "maximum": 1000 + }, + { + "name": "type", + "caption": "Delegation Type", + "description": "Filter type: 'delegated' (sent) or 'received'. Defaults to 'delegated'.", + "type": "string", + "enum": ["delegated", "received"], + "required": false, + "default": "delegated" + } + ], + "returns": { + "type": "array", + "description": "Array of vesting_delegation_api_object entries." + } + }, + { + "method": "get_expiring_vesting_delegations", + "description": "Returns expiring vesting delegation objects for a given account, starting from a given date.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The delegator account name.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Date", + "description": "Start date/time for expiration lookup (ISO timestamp).", + "type": "string", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results. Defaults to 100. Must not exceed 1000.", + "type": "integer", + "required": false, + "default": 100, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of vesting_delegation_expiration_api_object entries." + } + }, + { + "method": "get_transaction_hex", + "description": "Returns a hexadecimal dump of the serialized binary form of a transaction.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction object to serialize.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "string", + "description": "Hex-encoded serialized transaction." + } + }, + { + "method": "get_required_signatures", + "description": "Given a partially signed transaction and a set of available public keys, returns the minimal subset of public keys that should add signatures to authorize the transaction.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to analyze.", + "type": "object", + "required": true + }, + { + "name": "available_keys", + "caption": "Available Keys", + "description": "Array/set of public keys that the caller can sign with.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of public keys that are required to sign the transaction." + } + }, + { + "method": "get_potential_signatures", + "description": "Returns the set of all public keys that could possibly sign for a given transaction. Useful for wallets to filter their key set before calling get_required_signatures.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to analyze.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Set of all public keys that could potentially authorize the transaction." + } + }, + { + "method": "verify_authority", + "description": "Verifies that a transaction has all of the required signatures. Returns true if valid, otherwise throws an exception.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to verify.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "boolean", + "description": "true if the transaction has all required signatures." + } + }, + { + "method": "verify_account_authority", + "description": "Verifies that a set of public keys has sufficient authority to authorize actions on behalf of an account.", + "params": [ + { + "name": "name_or_id", + "caption": "Account Name", + "description": "The account name to check authority for.", + "type": "string", + "required": true + }, + { + "name": "signers", + "caption": "Signer Keys", + "description": "Array/set of public keys to verify against the account's authority.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "boolean", + "description": "true if the signers have enough authority to authorize the account." + } + }, + { + "method": "get_database_info", + "description": "Returns database shared memory usage information including total size, free size, reserved size, used size, and per-index record counts.", + "params": [], + "returns": { + "type": "object", + "description": "Object with total_size, free_size, reserved_size, used_size, and index_list (array of {name, record_count})." + } + }, + { + "method": "get_proposed_transactions", + "description": "Returns proposed transactions (proposals) associated with a given account, both authored and requiring approval.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name whose proposals to retrieve.", + "type": "string", + "required": true + }, + { + "name": "from", + "caption": "From Offset", + "description": "Offset for pagination (number of results to skip).", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of proposals to return. Must not exceed 100.", + "type": "integer", + "required": true, + "maximum": 100 + } + ], + "returns": { + "type": "array", + "description": "Array of proposal_api_object entries." + } + }, + { + "method": "get_accounts_on_sale", + "description": "Returns a list of accounts currently on sale (direct sale, not auction). Only accounts whose sale start time has passed are included.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of account_on_sale_api_object entries." + } + }, + { + "method": "get_accounts_on_auction", + "description": "Returns a list of accounts currently on auction (no target buyer set). Only accounts whose sale start time has passed are included.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of account_on_sale_api_object entries for auction listings." + } + }, + { + "method": "get_subaccounts_on_sale", + "description": "Returns a list of subaccounts currently on sale.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of subaccount_on_sale_api_object entries." + } + } + ] + }, + { + "name": "account_by_key", + "description": "Provides a lookup from public keys to the accounts that reference those keys in their authority.", + "methods": [ + { + "method": "get_key_references", + "description": "Returns all account names that reference the given public keys in their master, active, or regular authority.", + "params": [ + { + "name": "keys", + "caption": "Public Keys", + "description": "Array of public keys to look up.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of arrays of account names. Each inner array corresponds to one input key and contains all accounts referencing that key." + } + } + ] + }, + { + "name": "network_broadcast_api", + "description": "Provides transaction and block broadcasting capabilities. This is the write API for submitting transactions to the network.", + "methods": [ + { + "method": "broadcast_transaction", + "description": "Broadcasts a signed transaction to the network. The transaction is accepted into the pending pool and propagated to P2P peers. Optionally checks that the blockchain is not too far behind.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. If the blockchain is behind by more than this, the call will fail. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "null", + "description": "No return value on success." + } + }, + { + "method": "broadcast_transaction_synchronous", + "description": "Broadcasts a signed transaction and waits for confirmation. Returns the transaction ID, block number, and transaction index once included in a block. The callback includes whether the transaction expired.", + "params": [ + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "object", + "description": "Object with id (transaction hash), block_num, trx_num, and expired fields." + } + }, + { + "method": "broadcast_block", + "description": "Broadcasts a signed block to the network. Typically used by validators to propagate newly produced blocks.", + "params": [ + { + "name": "block", + "caption": "Block", + "description": "The signed block to broadcast.", + "type": "object", + "required": true + } + ], + "returns": { + "type": "null", + "description": "No return value on success." + } + }, + { + "method": "broadcast_transaction_with_callback", + "description": "Broadcasts a signed transaction with a confirmation callback. The first argument is the callback, followed by the transaction. Similar to broadcast_transaction_synchronous but with custom callback handling.", + "params": [ + { + "name": "callback", + "caption": "Callback", + "description": "Confirmation callback function.", + "type": "function", + "required": true + }, + { + "name": "trx", + "caption": "Transaction", + "description": "The signed transaction to broadcast.", + "type": "object", + "required": true + }, + { + "name": "max_block_age", + "caption": "Max Block Age", + "description": "Optional. Maximum allowed age of the head block in seconds. Use -1 to disable.", + "type": "integer", + "required": false + } + ], + "returns": { + "type": "null", + "description": "No direct return; result delivered via callback." + } + } + ] + }, + { + "name": "committee_api", + "description": "Provides access to committee worker proposal requests and their voting state.", + "methods": [ + { + "method": "get_committee_request", + "description": "Returns a committee request by its ID, optionally including votes.", + "params": [ + { + "name": "request_id", + "caption": "Request ID", + "description": "The numeric ID of the committee request to retrieve.", + "type": "integer", + "required": true + }, + { + "name": "votes_count", + "caption": "Votes Count", + "description": "Number of votes to include. Use 0 for no votes, -1 for all votes, or a positive number to limit.", + "type": "integer", + "required": false, + "default": 0 + } + ], + "returns": { + "type": "object", + "description": "committee_api_object with optional embedded votes array." + } + }, + { + "method": "get_committee_request_votes", + "description": "Returns all votes for a specific committee request.", + "params": [ + { + "name": "request_id", + "caption": "Request ID", + "description": "The numeric ID of the committee request whose votes to retrieve.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "description": "Array of committee_vote_state objects." + } + }, + { + "method": "get_committee_requests_list", + "description": "Returns a list of committee request IDs filtered by status.", + "params": [ + { + "name": "status", + "caption": "Status", + "description": "The status code to filter by (e.g. 0=pending, 1=approved, etc.).", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "integer" }, + "description": "Array of committee request IDs matching the given status." + } + } + ] + }, + { + "name": "invite_api", + "description": "Provides access to invite objects used for account registration via invite keys.", + "methods": [ + { + "method": "get_invites_list", + "description": "Returns a list of invite IDs filtered by status.", + "params": [ + { + "name": "status", + "caption": "Status", + "description": "The status code to filter invites by.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "integer" }, + "description": "Array of invite database IDs matching the given status." + } + }, + { + "method": "get_invite_by_id", + "description": "Returns an invite object by its database ID.", + "params": [ + { + "name": "id", + "caption": "Invite ID", + "description": "The database ID of the invite to retrieve.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "invite_api_object with invite details (key, creator, balance, etc.)." + } + }, + { + "method": "get_invite_by_key", + "description": "Returns an invite object by its public key.", + "params": [ + { + "name": "key", + "caption": "Invite Key", + "description": "The public key associated with the invite.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "invite_api_object matching the given key." + } + } + ] + }, + { + "name": "paid_subscription_api", + "description": "Provides access to paid subscription data: subscription options set by content creators, subscription status of subscribers, and active/inactive subscription lists.", + "methods": [ + { + "method": "get_paid_subscription_options", + "description": "Returns the paid subscription settings for a given account (creator).", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name of the subscription creator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "paid_subscription_state with subscription details (price, period, etc.)." + } + }, + { + "method": "get_paid_subscriptions", + "description": "Returns a paginated list of all paid subscription objects.", + "params": [ + { + "name": "from", + "caption": "From Offset", + "description": "Number of results to skip for pagination.", + "type": "integer", + "required": true + }, + { + "name": "limit", + "caption": "Limit", + "description": "Maximum number of results to return. Must not exceed 1000.", + "type": "integer", + "required": true, + "maximum": 1000 + } + ], + "returns": { + "type": "array", + "description": "Array of paid_subscription_object entries." + } + }, + { + "method": "get_paid_subscription_status", + "description": "Returns the subscription status of a specific subscriber for a given creator account.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + }, + { + "name": "account", + "caption": "Creator Account", + "description": "The account name of the subscription creator.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "paid_subscribe_state with subscription status details." + } + }, + { + "method": "get_active_paid_subscriptions", + "description": "Returns a list of creator account names that a given subscriber has active subscriptions to.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of creator account names with active subscriptions." + } + }, + { + "method": "get_inactive_paid_subscriptions", + "description": "Returns a list of creator account names that a given subscriber has inactive (expired) subscriptions to.", + "params": [ + { + "name": "subscriber", + "caption": "Subscriber", + "description": "The account name of the subscriber.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of creator account names with inactive subscriptions." + } + } + ] + }, + { + "name": "custom_protocol_api", + "description": "Provides access to account data enriched with custom protocol sequence information. Custom protocols allow third-party applications to track per-account custom operations.", + "methods": [ + { + "method": "get_account", + "description": "Returns an account object enriched with custom protocol sequence data for a specific custom protocol ID. Populates custom_sequence and custom_sequence_block_num fields.", + "params": [ + { + "name": "account", + "caption": "Account Name", + "description": "The account name to look up.", + "type": "string", + "required": true + }, + { + "name": "custom_protocol_id", + "caption": "Custom Protocol ID", + "description": "The custom protocol ID string to retrieve the sequence for. Use empty string to skip custom protocol lookup.", + "type": "string", + "required": true + } + ], + "returns": { + "type": "object", + "description": "account_api_object with custom_sequence and custom_sequence_block_num populated for the given protocol." + } + } + ] + }, + { + "name": "auth_util", + "description": "Provides utility methods for verifying account authority signatures against arbitrary data digests.", + "methods": [ + { + "method": "check_authority_signature", + "description": "Verifies that the provided signatures are valid for the given account's authority at a specified level (master, active, or regular). Returns the public keys derived from the signatures.", + "params": [ + { + "name": "account_name", + "caption": "Account Name", + "description": "The account name whose authority to check.", + "type": "string", + "required": true + }, + { + "name": "level", + "caption": "Authority Level", + "description": "The authority level to verify against: 'master' (or 'm'), 'active' (or 'a'), 'regular' (or 'r'). Empty string defaults to 'active'.", + "type": "string", + "required": true + }, + { + "name": "dig", + "caption": "Digest", + "description": "The SHA-256 hash of the data that was signed.", + "type": "string", + "required": true + }, + { + "name": "sigs", + "caption": "Signatures", + "description": "Array of signatures to verify.", + "type": "array", + "items": { "type": "string" }, + "required": true + } + ], + "returns": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of public keys recovered from the valid signatures." + } + } + ] + }, + { + "name": "block_info", + "description": "Tracks block metadata (size, average block size, slot info) and provides queries to retrieve this information for ranges of blocks.", + "methods": [ + { + "method": "get_block_info", + "description": "Returns block metadata (block_id, block_size, average_block_size, aslot, last_irreversible_block_num) for a range of blocks starting from start_block_num.", + "params": [ + { + "name": "start_block_num", + "caption": "Start Block Number", + "description": "The first block number to return info for. Must be greater than 0.", + "type": "integer", + "required": true, + "minimum": 1 + }, + { + "name": "count", + "caption": "Count", + "description": "Number of blocks to return info for. Must not exceed 10000.", + "type": "integer", + "required": true, + "maximum": 10000 + } + ], + "returns": { + "type": "array", + "description": "Array of block_info objects. Entries may be empty if no info is stored (e.g. blocks before snapshot)." + } + }, + { + "method": "get_blocks_with_info", + "description": "Returns full signed blocks with attached metadata for a range. Limits total response size to 8 MB. Stops early if no info is stored for a block.", + "params": [ + { + "name": "start_block_num", + "caption": "Start Block Number", + "description": "The first block number to return. Must be greater than 0.", + "type": "integer", + "required": true, + "minimum": 1 + }, + { + "name": "count", + "caption": "Count", + "description": "Maximum number of blocks to return. Must not exceed 10000. Response is capped at 8 MB total.", + "type": "integer", + "required": true, + "maximum": 10000 + } + ], + "returns": { + "type": "array", + "description": "Array of block_with_info objects, each containing a signed block and its block_info metadata." + } + } + ] + }, + { + "name": "raw_block", + "description": "Provides access to raw (base64-encoded) serialized block data for low-level block inspection or re-import.", + "methods": [ + { + "method": "get_raw_block", + "description": "Returns a raw block by block number, including the base64-encoded serialized binary, block ID, previous block ID, and timestamp.", + "params": [ + { + "name": "block_num", + "caption": "Block Number", + "description": "Height of the block to retrieve in raw form.", + "type": "integer", + "required": true + } + ], + "returns": { + "type": "object", + "description": "Object with block_id, previous, timestamp, and raw_block (base64-encoded string) fields." + } + } + ] + } + ] +} diff --git a/@l10n/ru/docs/development/building.md b/@l10n/ru/docs/development/building.md index 7ba29470f0..cdd27517d6 100644 --- a/@l10n/ru/docs/development/building.md +++ b/@l10n/ru/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release-сборка (по умолчанию) -./build-linux.sh -l # LOW_MEMORY_NODE (узлы-валидаторы) ./build-linux.sh -n # Testnet-сборка ./build-linux.sh -t Debug -j4 # Debug-сборка с 4 параллельными задачами -./build-linux.sh --skip-deps # Пропустить установку зависимостей ./build-linux.sh --install # Установить в систему после сборки # Пользовательские пути к зависимостям @@ -54,7 +52,6 @@ chmod +x build-mac.sh **Параметры:** ```bash -./build-mac.sh -l # Low-memory узел ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # Пропустить установки Homebrew ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | Переменная | По умолчанию | Описание | |-----------|-------------|---------| | `VIZ_BUILD_TYPE` | Release | Release или Debug | -| `VIZ_LOW_MEMORY` | OFF | Включить low-memory узел | | `VIZ_BUILD_TESTNET` | OFF | Testnet-сборка | | `VIZ_FULL_STATIC` | OFF | Полностью статический бинарник | | `VIZ_CMAKE_EXTRA` | — | Дополнительные флаги CMake | @@ -100,7 +96,6 @@ build-msvc.bat |-----------|-------------|---------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Генератор Visual Studio | | `VIZ_BUILD_TYPE` | Release | Тип сборки | -| `VIZ_LOW_MEMORY` | OFF | Low-memory узел | | `VIZ_BUILD_TESTNET` | OFF | Testnet-сборка | **Требования:** Visual Studio 2019+ с нагрузкой "Desktop development with C++", CMake 3.16+. @@ -125,7 +120,6 @@ build-msvc.bat | Параметр | По умолчанию | Описание | |---------|-------------|---------| | `BUILD_TESTNET` | OFF | Сборка для testnet | -| `LOW_MEMORY_NODE` | OFF | Исключить неконсенсусные данные (уменьшает RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | Включить проверку блокировок (только для разработки) | | `BUILD_SHARED_LIBRARIES` | OFF | Собирать разделяемые библиотеки | | `USE_PCH` | OFF | Включить предкомпилированные заголовки (ускоряет пересборку) | @@ -140,9 +134,6 @@ build-msvc.bat # Release-сборка python3 programs/build_helpers/configure_build.py --release --src ../.. -# Debug с low-memory -python3 programs/build_helpers/configure_build.py --debug --low-memory - # Кросс-компиляция для Windows с MinGW python3 programs/build_helpers/configure_build.py --win --release diff --git a/@l10n/ru/docs/node/building.md b/@l10n/ru/docs/node/building.md index 60f278784d..3c70eceb8a 100644 --- a/@l10n/ru/docs/node/building.md +++ b/@l10n/ru/docs/node/building.md @@ -46,9 +46,6 @@ chmod +x build-linux.sh ### Основные флаги сборки ```bash -# Низкопамятный узел (для валидаторов/сид-узлов — без плагинов индексирования истории) -./build-linux.sh -l - # Сборка для тестнета ./build-linux.sh -n @@ -58,9 +55,6 @@ chmod +x build-linux.sh # Параллельные задания ./build-linux.sh -j 8 -# Пропустить установку зависимостей (уже установлены) -./build-linux.sh --skip-deps - # Пользовательские пути к Boost / OpenSSL ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ build-mingw.bat | Переменная | По умолчанию | Описание | |------------|--------------|----------| | `VIZ_BUILD_TYPE` | `Release` | `Release` или `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` для низкопамятного узла | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` для сборки тестнета | | `VIZ_FULL_STATIC` | `OFF` | `ON` для полностью статического бинарного файла | @@ -124,7 +117,6 @@ build-msvc.bat | Опция | По умолчанию | Описание | |-------|--------------|----------| | `BUILD_TESTNET` | `OFF` | Включить код для тестнета | -| `LOW_MEMORY_NODE` | `OFF` | Исключить плагины истории/индексирования | | `CHAINBASE_CHECK_LOCKING` | `OFF` | Включить проверки блокировок (debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | Собрать разделяемые библиотеки | | `USE_PCH` | `OFF` | Включить предкомпилированные заголовки (ускоряет пересборку) | @@ -134,7 +126,6 @@ build-msvc.bat ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/@l10n/ru/docs/node/docker.md b/@l10n/ru/docs/node/docker.md index 94ef93829d..48cb630638 100644 --- a/@l10n/ru/docs/node/docker.md +++ b/@l10n/ru/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### CMake-флаги для каждого образа -| Образ | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|-------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| Образ | `BUILD_TESTNET` | +|-------|:---------------:| +| production | OFF | +| testnet | ON | --- @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## Ротация логов + +vizd пишет весь вывод в stdout/stderr. Дефолтный драйвер `json-file` в Docker **не имеет ограничений по размеру** — цикл краша или буря ошибок может заполнить диск хоста за считанные минуты (в продакшне наблюдалось 35 ГБ+). + +Вместо этого используйте драйвер `local`. Он хранит логи в компактном бинарном формате и автоматически ротирует файлы. + +**Глобальная конфигурация (рекомендуется — защищает все контейнеры на хосте):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +Применить: + +```bash +sudo systemctl restart docker +``` + +**Для конкретного контейнера (`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**Для конкретного контейнера (docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> При `max-file: 5` и `max-size: 100m` Docker хранит не более 500 МБ логов на контейнер и автоматически удаляет старейший файл при ротации. + +--- + ## Устранение неполадок | Симптом | Причина | Решение | @@ -176,3 +229,4 @@ shared-file-size = 4G | Нет пиров | Файрвол блокирует порт 2001 | Откройте порт 2001 TCP входящий | | Медленная синхронизация | Снимок не загружен | Предоставьте снимок в томе перед первым запуском | | `Permission denied` на `/var/lib/vizd` | Несоответствие владельца тома | `chown -R 1000:1000 /data/vizd` | +| Диск заполняется логами Docker | Драйвер `json-file` не имеет ограничения по размеру | Настройте драйвер `local` с `max-size`/`max-file` — см. [Ротация логов](#ротация-логов) | diff --git a/@l10n/ru/docs/node/getting-started.md b/@l10n/ru/docs/node/getting-started.md index 538b83ee05..2540d782c6 100644 --- a/@l10n/ru/docs/node/getting-started.md +++ b/@l10n/ru/docs/node/getting-started.md @@ -133,7 +133,7 @@ shared-file-size = 4G # Плагины (полный узел) plugin = chain p2p webserver json_rpc database_api network_broadcast_api -plugin = social_network tags follow account_history +plugin = account_history ``` Для узла-валидатора см. [Узел-валидатор](./validator-node.md). @@ -172,7 +172,6 @@ curl -s -X POST http://localhost:8090 \ | Полный узел | `config.ini` | Все плагины, публичные RPC-эндпоинты | | Валидатор | `config_witness.ini` | Производство блоков, RPC только на localhost | | Тестовая сеть | `config_testnet.ini` | Разработка и тестирование | -| Малая память | `config.ini` + флаг сборки `LOW_MEMORY_NODE` | Только консенсус, без индексов истории | --- diff --git a/@l10n/ru/docs/plugins/overview.md b/@l10n/ru/docs/plugins/overview.md index e07d477e00..b829a4b162 100644 --- a/@l10n/ru/docs/plugins/overview.md +++ b/@l10n/ru/docs/plugins/overview.md @@ -222,6 +222,24 @@ DLT P2P-сетевое взаимодействие — распростране - `track-account-range` — диапазон имён аккаунтов для индексирования (по умолчанию: все аккаунты) - `history-count-blocks` — сохранять историю за N блоков +> **Зависимость:** `account_history` **требует** `operation_history` как родительский плагин +> (`APPBASE_PLUGIN_REQUIRES`). Нода не запустится при отсутствии `operation_history`. +> `account_history` хранит ссылки `operation_id_type` (внешние ключи) на строки `operation_object`, +> которыми управляет `operation_history`; при запросе `get_account_history` разрешает их через +> `database.get(itr->op)`. +> +> **Всегда включать оба плагина вместе:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **Координация очистки:** Оба плагина читают один и тот же ключ `history-count-blocks` из +> `config.ini` — разделения по плагинам нет. Одно значение применяется к обоим одновременно. +> Внутри `account_history` дополнительно вызывает `operation_history::get_min_keep_block()` +> при каждом блоке как защитную проверку, гарантируя, что его записи никогда не будут ссылаться +> на уже удалённый `operation_object`. + --- ### `operation_history` diff --git a/@l10n/ru/docs/plugins/webserver.md b/@l10n/ru/docs/plugins/webserver.md index 3284e98243..4ec83ecd93 100644 --- a/@l10n/ru/docs/plugins/webserver.md +++ b/@l10n/ru/docs/plugins/webserver.md @@ -137,22 +137,6 @@ server { } location / { - # CORS — разрешить любой источник (публичный API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; diff --git a/@l10n/ru/docs/storage/shared-memory.md b/@l10n/ru/docs/storage/shared-memory.md index dc752c7272..4989cd765f 100644 --- a/@l10n/ru/docs/storage/shared-memory.md +++ b/@l10n/ru/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true База данных автоматически увеличивается, когда свободное место падает ниже `min-free-shared-file-size`. При каждом изменении размера: -1. Приостанавливаются все операции (включая производство блоков и API-запросы). -2. Уничтожается текущее отображение памяти. -3. Файл увеличивается на `inc-shared-file-size`. -4. Файл заново отображается, пересчитываются все указатели индексов. +1. Записывается маркер сбоя `resize_in_progress`. +2. Все грязные страницы сбрасываются на диск (`flush()`). +3. Приостанавливаются все операции (включая производство блоков и API-запросы). +4. Уничтожается текущее отображение памяти. +5. Файл увеличивается на `inc-shared-file-size`. +6. Файл заново отображается, пересчитываются все указатели индексов. +7. Проверяется, что ключевые объекты (например, `dynamic_global_property_object`) пережили перераспределение. +8. Маркер сбоя удаляется. + +### Механизмы безопасности + +- **Сброс перед изменением размера:** Грязные страницы записываются на диск до уничтожения отображения, что гарантирует согласованность файла на диске в случае сбоя во время увеличения. +- **Маркер сбоя:** Файл `resize_in_progress` записывается перед деструктивным перераспределением и удаляется после успешного завершения. Если процесс аварийно завершается во время изменения размера, маркер сохраняется и запускает автоматическое восстановление при следующем запуске. +- **Проверка после изменения размера:** После перераспределения узел проверяет, что `max_memory()` соответствует ожидаемому размеру и что критические объекты (например, `dynamic_global_property_object`) не повреждены. Повреждение обнаруживается рано, а не приводит к запутанным сбоям позже. +- **Безопасность при bad_alloc:** Если разделяемая память исчерпана во время применения блока, сессия отмены безопасно отбрасывается (вместо попытки обречённой отмены, которая привела бы к краху процесса через `std::terminate`). Отложенное изменение размера планируется на следующий блок. Выделяйте `shared-file-size` с запасом, чтобы минимизировать частоту изменений размера. Каждое изменение вызывает скачок задержки. @@ -121,11 +132,12 @@ skip-virtual-ops = true ``` 1. Открыть shared_memory.bin (увеличить, если shared-file-size больше) 2. Захватить эксклюзивную блокировку файла -3. Инициализировать индексы -4. Если отсутствует genesis → init_genesis() -5. Открыть block_log или dlt_block_log -6. undo_all() → откатиться к последнему необратимому блоку -7. Проверить совпадение head блока с block log +3. Проверить маркер сбоя resize_in_progress → запустить восстановление при обнаружении +4. Инициализировать индексы +5. Если отсутствует genesis → init_genesis() +6. Открыть block_log или dlt_block_log +7. undo_all() → откатиться к последнему необратимому блоку +8. Проверить совпадение head блока с block log ``` --- @@ -133,10 +145,12 @@ skip-virtual-ops = true ## Восстановление | Симптом | Действие | -|---------|---------| +|---------|--------| | `CRITICAL: validator X account object MISSING` | Повреждение — использовать `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | Повреждение — использовать `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | Конкуренция за блокировку — увеличить `read-wait-micro` / включить `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | Прерванное изменение размера — использовать `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | Повреждение после изменения размера — использовать `--replay-from-snapshot --snapshot-auto-latest` | | Узел зацикливается при запуске | Повреждённый файл — `--replay-from-snapshot --snapshot-auto-latest` | Варианты восстановления: diff --git a/@l10n/zh-CN/docs/development/building.md b/@l10n/zh-CN/docs/development/building.md index f7c1cb5125..8eb6279129 100644 --- a/@l10n/zh-CN/docs/development/building.md +++ b/@l10n/zh-CN/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release 构建(默认) -./build-linux.sh -l # LOW_MEMORY_NODE(验证者节点) ./build-linux.sh -n # Testnet 构建 ./build-linux.sh -t Debug -j4 # Debug 构建,4 个并行任务 -./build-linux.sh --skip-deps # 跳过依赖安装 ./build-linux.sh --install # 构建后安装到系统 # 自定义依赖路径 @@ -54,7 +52,6 @@ chmod +x build-mac.sh **选项:** ```bash -./build-mac.sh -l # 低内存节点 ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # 跳过 Homebrew 安装 ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | 变量 | 默认值 | 描述 | |------|--------|------| | `VIZ_BUILD_TYPE` | Release | Release 或 Debug | -| `VIZ_LOW_MEMORY` | OFF | 启用低内存节点 | | `VIZ_BUILD_TESTNET` | OFF | Testnet 构建 | | `VIZ_FULL_STATIC` | OFF | 完全静态二进制文件 | | `VIZ_CMAKE_EXTRA` | — | 附加 CMake 标志 | @@ -100,7 +96,6 @@ build-msvc.bat |------|--------|------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Visual Studio 生成器 | | `VIZ_BUILD_TYPE` | Release | 构建类型 | -| `VIZ_LOW_MEMORY` | OFF | 低内存节点 | | `VIZ_BUILD_TESTNET` | OFF | Testnet 构建 | **要求:** Visual Studio 2019+(带"Desktop development with C++"工作负载)、CMake 3.16+。 @@ -125,7 +120,6 @@ build-msvc.bat | 选项 | 默认值 | 描述 | |------|--------|------| | `BUILD_TESTNET` | OFF | 为 testnet 构建 | -| `LOW_MEMORY_NODE` | OFF | 排除非共识数据(减少 RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | 启用锁检查(仅用于开发) | | `BUILD_SHARED_LIBRARIES` | OFF | 构建共享库 | | `USE_PCH` | OFF | 启用预编译头文件(加速重新构建) | @@ -140,9 +134,6 @@ build-msvc.bat # Release 构建 python3 programs/build_helpers/configure_build.py --release --src ../.. -# 带低内存的 Debug -python3 programs/build_helpers/configure_build.py --debug --low-memory - # 使用 MinGW 交叉编译 Windows 版本 python3 programs/build_helpers/configure_build.py --win --release diff --git a/@l10n/zh-CN/docs/node/building.md b/@l10n/zh-CN/docs/node/building.md index 839144dc0c..f40637cbde 100644 --- a/@l10n/zh-CN/docs/node/building.md +++ b/@l10n/zh-CN/docs/node/building.md @@ -46,9 +46,6 @@ chmod +x build-linux.sh ### 常用构建标志 ```bash -# 低内存节点(验证者/种子节点 — 排除历史索引) -./build-linux.sh -l - # 测试网构建 ./build-linux.sh -n @@ -58,9 +55,6 @@ chmod +x build-linux.sh # 并行任务数 ./build-linux.sh -j 8 -# 跳过依赖安装(已安装) -./build-linux.sh --skip-deps - # 自定义 Boost / OpenSSL 路径 ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ build-mingw.bat | 变量 | 默认值 | 描述 | |------|-------|------| | `VIZ_BUILD_TYPE` | `Release` | `Release` 或 `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` 构建低内存节点 | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` 用于测试网构建 | | `VIZ_FULL_STATIC` | `OFF` | `ON` 构建完全静态二进制文件 | @@ -124,7 +117,6 @@ build-msvc.bat | 选项 | 默认值 | 描述 | |------|-------|------| | `BUILD_TESTNET` | `OFF` | 启用测试网专用代码 | -| `LOW_MEMORY_NODE` | `OFF` | 排除历史/索引插件 | | `CHAINBASE_CHECK_LOCKING` | `OFF` | 启用锁断言检查(debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | 构建共享库 | | `USE_PCH` | `OFF` | 启用预编译头文件(加快重新构建) | @@ -134,7 +126,6 @@ build-msvc.bat ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/@l10n/zh-CN/docs/node/docker.md b/@l10n/zh-CN/docs/node/docker.md index e8e96a14a9..97ee609668 100644 --- a/@l10n/zh-CN/docs/node/docker.md +++ b/@l10n/zh-CN/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### 各镜像的 CMake 标志 -| 镜像 | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| 镜像 | `BUILD_TESTNET` | +|------|:---------------:| +| production | OFF | +| testnet | ON | --- @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## 日志轮转 + +vizd 将所有输出写入 stdout/stderr。Docker 默认的 `json-file` 日志驱动**没有大小限制**——崩溃循环或断言风暴可在数分钟内填满宿主机磁盘(生产环境中曾观察到 35 GB+)。 + +建议改用 `local` 驱动。它以紧凑的二进制格式存储日志并自动轮转。 + +**全局配置(推荐——保护宿主机上的所有容器):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +应用配置: + +```bash +sudo systemctl restart docker +``` + +**单容器配置(`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**单容器配置(docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> 设置 `max-file: 5` 和 `max-size: 100m` 后,Docker 每个容器最多保留 500 MB 日志,轮转时自动删除最旧的文件。 + +--- + ## 故障排除 | 症状 | 原因 | 解决方案 | @@ -176,3 +229,4 @@ shared-file-size = 4G | 无对等节点 | 防火墙阻止端口 2001 | 开放 2001 TCP 入站 | | 同步缓慢 | 未加载快照 | 首次启动前在卷中提供快照 | | `/var/lib/vizd` 权限拒绝 | 卷所有权不匹配 | `chown -R 1000:1000 /data/vizd` | +| Docker 日志填满磁盘 | `json-file` 驱动没有大小限制 | 配置带 `max-size`/`max-file` 的 `local` 驱动——参见[日志轮转](#日志轮转) | diff --git a/@l10n/zh-CN/docs/node/getting-started.md b/@l10n/zh-CN/docs/node/getting-started.md index e499476b82..41961058c1 100644 --- a/@l10n/zh-CN/docs/node/getting-started.md +++ b/@l10n/zh-CN/docs/node/getting-started.md @@ -172,7 +172,6 @@ curl -s -X POST http://localhost:8090 \ | 全节点 | `config.ini` | 所有插件,公共 RPC 端点 | | 验证者 | `config_witness.ini` | 区块生产,RPC 仅限本地 | | 测试网 | `config_testnet.ini` | 开发和测试 | -| 低内存 | `config.ini` + `LOW_MEMORY_NODE` 构建标志 | 仅共识,无历史索引 | --- diff --git a/@l10n/zh-CN/docs/plugins/overview.md b/@l10n/zh-CN/docs/plugins/overview.md index b394601f0e..b9d59cf482 100644 --- a/@l10n/zh-CN/docs/plugins/overview.md +++ b/@l10n/zh-CN/docs/plugins/overview.md @@ -222,6 +222,23 @@ DLT P2P 网络——区块和交易传播、节点管理、少数派 fork 恢复 - `track-account-range` — 索引的账户名范围(默认:所有账户) - `history-count-blocks` — 保留 N 个区块的历史 +> **依赖关系:** `account_history` **需要** `operation_history` 作为父插件 +> (`APPBASE_PLUGIN_REQUIRES`)。若缺少 `operation_history`,节点将无法启动。 +> `account_history` 存储指向 `operation_object` 行的 `operation_id_type` 引用(外键), +> 这些行由 `operation_history` 管理;查询时 `get_account_history` 通过 +> `database.get(itr->op)` 解析这些引用。 +> +> **始终同时启用两个插件:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **清理协调:** 两个插件从 `config.ini` 读取同一个 `history-count-blocks` 键—— +> 不存在按插件分别设置的机制。设置一次即同时作用于两个插件。 +> 内部实现上,`account_history` 还在每个区块调用 `operation_history::get_min_keep_block()` +> 作为安全检查,确保其条目永远不会引用已被删除的 `operation_object`。 + --- ### `operation_history` diff --git a/@l10n/zh-CN/docs/plugins/webserver.md b/@l10n/zh-CN/docs/plugins/webserver.md index 584d58faa1..e21c91c5d1 100644 --- a/@l10n/zh-CN/docs/plugins/webserver.md +++ b/@l10n/zh-CN/docs/plugins/webserver.md @@ -138,22 +138,6 @@ server { } location / { - # CORS — 允许任意来源(公开 API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; diff --git a/@l10n/zh-CN/docs/storage/shared-memory.md b/@l10n/zh-CN/docs/storage/shared-memory.md index c1acd37775..01fdb92abd 100644 --- a/@l10n/zh-CN/docs/storage/shared-memory.md +++ b/@l10n/zh-CN/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true 当空闲空间降至 `min-free-shared-file-size` 以下时,数据库自动增长。每次调整大小时: -1. 暂停所有操作(包括区块生产和 API 请求)。 -2. 销毁当前内存映射。 -3. 按 `inc-shared-file-size` 扩展文件。 -4. 重新映射文件并重建所有索引指针。 +1. 写入 `resize_in_progress` 崩溃标记文件。 +2. 将所有脏页刷新到磁盘(`flush()`)。 +3. 暂停所有操作(包括区块生产和 API 请求)。 +4. 销毁当前内存映射。 +5. 按 `inc-shared-file-size` 扩展文件。 +6. 重新映射文件并重建所有索引指针。 +7. 验证关键对象(如 `dynamic_global_property_object`)在重映射后完好无损。 +8. 删除崩溃标记。 + +### 安全机制 + +- **调整前刷新:** 在销毁映射之前将脏页写入磁盘,确保在增长过程中发生任何故障时磁盘上的文件保持一致。 +- **崩溃标记:** 在破坏性重映射之前写入 `resize_in_progress` 文件,成功后删除。如果进程在调整大小期间崩溃,标记会保留并在下次启动时触发自动恢复。 +- **调整后验证:** 重映射后,节点验证 `max_memory()` 是否与预期大小匹配,以及关键对象(如 `dynamic_global_property_object`)是否完好。损坏会被及早发现,而不是导致后续令人困惑的故障。 +- **bad_alloc 安全:** 如果在区块应用期间共享内存耗尽,撤销会话会被安全丢弃(而不是尝试注定失败的撤销,这将通过 `std::terminate` 导致进程崩溃)。延迟调整大小将安排在下一个区块进行。 预先充裕地分配 `shared-file-size` 以最小化调整大小频率。每次调整大小都会导致延迟峰值。 @@ -121,11 +132,12 @@ VIZ 主网全节点的大致使用量: ``` 1. 打开 shared_memory.bin(若 shared-file-size 更大则扩展) 2. 获取独占文件锁 -3. 初始化索引 -4. 若缺少 genesis → init_genesis() -5. 打开 block_log 或 dlt_block_log -6. undo_all() → 回滚到最后一个不可逆区块 -7. 验证头区块与区块日志匹配 +3. 检查 resize_in_progress 崩溃标记 → 若发现则触发恢复 +4. 初始化索引 +5. 若缺少 genesis → init_genesis() +6. 打开 block_log 或 dlt_block_log +7. undo_all() → 回滚到最后一个不可逆区块 +8. 验证头区块与区块日志匹配 ``` --- @@ -137,6 +149,8 @@ VIZ 主网全节点的大致使用量: | `CRITICAL: validator X account object MISSING` | 损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | 损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | 锁竞争 — 增大 `read-wait-micro` / 启用 `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | 中断的调整大小 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | 调整后损坏 — 使用 `--replay-from-snapshot --snapshot-auto-latest` | | 节点启动时循环崩溃 | 文件损坏 — `--replay-from-snapshot --snapshot-auto-latest` | 恢复选项: diff --git a/docs/development/building.md b/docs/development/building.md index f25d27de5f..60d74805fb 100644 --- a/docs/development/building.md +++ b/docs/development/building.md @@ -26,10 +26,8 @@ chmod +x build-linux.sh ```bash ./build-linux.sh # Release build (default) -./build-linux.sh -l # LOW_MEMORY_NODE (validator nodes) ./build-linux.sh -n # Testnet build ./build-linux.sh -t Debug -j4 # Debug build with 4 parallel jobs -./build-linux.sh --skip-deps # Skip dependency installation ./build-linux.sh --install # Install to system after build # Custom dependency paths @@ -54,7 +52,6 @@ Requires Xcode Command Line Tools and Homebrew. The script installs: `boost`, `c **Options:** ```bash -./build-mac.sh -l # Low-memory node ./build-mac.sh -n # Testnet ./build-mac.sh --skip-deps # Skip Homebrew installs ./build-mac.sh --boost-root /opt/boost_1_74_0 @@ -77,7 +74,6 @@ build-mingw.bat | Variable | Default | Description | |----------|---------|-------------| | `VIZ_BUILD_TYPE` | Release | Release or Debug | -| `VIZ_LOW_MEMORY` | OFF | Enable low-memory node | | `VIZ_BUILD_TESTNET` | OFF | Testnet build | | `VIZ_FULL_STATIC` | OFF | Fully static binary | | `VIZ_CMAKE_EXTRA` | — | Additional CMake flags | @@ -100,7 +96,6 @@ build-msvc.bat |----------|---------|-------------| | `VIZ_VS_VERSION` | "Visual Studio 17 2022" | Visual Studio generator | | `VIZ_BUILD_TYPE` | Release | Build type | -| `VIZ_LOW_MEMORY` | OFF | Low-memory node | | `VIZ_BUILD_TESTNET` | OFF | Testnet build | **Requirements:** Visual Studio 2019+ with "Desktop development with C++" workload, CMake 3.16+. @@ -125,7 +120,6 @@ All Dockerfiles use a two-stage build to minimize image size and use Boost 1.71 | Option | Default | Description | |--------|---------|-------------| | `BUILD_TESTNET` | OFF | Build for testnet | -| `LOW_MEMORY_NODE` | OFF | Exclude non-consensus data (reduces RAM) | | `CHAINBASE_CHECK_LOCKING` | OFF | Enable lock checking (development only) | | `BUILD_SHARED_LIBRARIES` | OFF | Build shared libraries | | `USE_PCH` | OFF | Enable precompiled headers (faster rebuilds) | @@ -140,9 +134,6 @@ Wraps CMake with sensible defaults and cross-compilation support: # Release build python3 programs/build_helpers/configure_build.py --release --src ../.. -# Debug with low-memory -python3 programs/build_helpers/configure_build.py --debug --low-memory - # Cross-compile for Windows with MinGW python3 programs/build_helpers/configure_build.py --win --release diff --git a/docs/introduction/key-concepts.md b/docs/introduction/key-concepts.md index 3dd3154150..e6b53d3e26 100644 --- a/docs/introduction/key-concepts.md +++ b/docs/introduction/key-concepts.md @@ -45,6 +45,12 @@ An authority is a multi-sig structure: `{ weight_threshold, account_auths[], key - Created by staking VIZ; withdrawn back to VIZ over 28 intervals (≈28 days) - Not directly transferable; can be delegated to other accounts +### Community Symbol: Ƶ + +The community has chosen **Ƶ** as the short symbol for VIZ. Most wallets, explorers, and applications display it instead of the full ticker. + +It is also common practice to show balances with **2 decimal places** regardless of the underlying token type. Even staked funds (SHARES) are often displayed as `Ƶ` with a note that they are staked in the account, rather than switching to the `SHARES` unit and its 6-decimal format. + --- ## Energy System diff --git a/docs/node/building.md b/docs/node/building.md index 682aff4ed2..f7887aa811 100644 --- a/docs/node/building.md +++ b/docs/node/building.md @@ -46,9 +46,6 @@ Output binary: `build/programs/vizd/vizd` ### Common build flags ```bash -# Low-memory node (validators/seed nodes — excludes history indexing) -./build-linux.sh -l - # Testnet build ./build-linux.sh -n @@ -58,9 +55,6 @@ Output binary: `build/programs/vizd/vizd` # Parallel jobs ./build-linux.sh -j 8 -# Skip dependency installation (already installed) -./build-linux.sh --skip-deps - # Custom Boost / OpenSSL paths ./build-linux.sh --boost-root /opt/boost_1_74_0 --openssl-root /opt/openssl ``` @@ -99,7 +93,6 @@ Optional environment variables: | Variable | Default | Description | |----------|---------|-------------| | `VIZ_BUILD_TYPE` | `Release` | `Release` or `Debug` | -| `VIZ_LOW_MEMORY` | `OFF` | `ON` to build low-memory node | | `VIZ_BUILD_TESTNET` | `OFF` | `ON` for testnet build | | `VIZ_FULL_STATIC` | `OFF` | `ON` for fully static binary | @@ -124,7 +117,6 @@ For direct CMake usage (advanced): | Option | Default | Description | |--------|---------|-------------| | `BUILD_TESTNET` | `OFF` | Enable testnet-specific code | -| `LOW_MEMORY_NODE` | `OFF` | Exclude history/indexing plugins | | `CHAINBASE_CHECK_LOCKING` | `OFF` | Enable lock assertion checks (debug) | | `BUILD_SHARED_LIBRARIES` | `OFF` | Build shared libraries | | `USE_PCH` | `OFF` | Enable precompiled headers (faster rebuilds) | @@ -134,7 +126,6 @@ Example: ```bash mkdir build && cd build cmake -DCMAKE_BUILD_TYPE=Release \ - -DLOW_MEMORY_NODE=ON \ -DCMAKE_INSTALL_PREFIX=/usr/local \ .. make -j$(nproc) diff --git a/docs/node/docker.md b/docs/node/docker.md index 3205a19072..2a68a11c94 100644 --- a/docs/node/docker.md +++ b/docs/node/docker.md @@ -129,10 +129,10 @@ docker build \ ### CMake flags per image -| Image | `LOW_MEMORY_NODE` | `BUILD_TESTNET` | -|-------|:-----------------:|:---------------:| -| production | OFF | OFF | -| testnet | OFF | ON | +| Image | `BUILD_TESTNET` | +|-------|:---------------:| +| production | OFF | +| testnet | ON | --- @@ -167,6 +167,59 @@ shared-file-size = 4G --- +## Log Rotation + +vizd writes all output to stdout/stderr. Docker's default `json-file` log driver has **no size limit** — a crash loop or assertion storm can fill the host disk in minutes (35 GB+ observed in production). + +Use the `local` driver instead. It uses a compact binary format and rotates automatically. + +**Global config (recommended — protects all containers on the host):** + +```json +// /etc/docker/daemon.json +{ + "log-driver": "local", + "log-opts": { + "max-size": "100m", + "max-file": "5" + } +} +``` + +Apply with: + +```bash +sudo systemctl restart docker +``` + +**Per-container (`docker run`):** + +```bash +docker run -d \ + --log-driver=local \ + --log-opt max-size=100m \ + --log-opt max-file=5 \ + --name vizd \ + vizblockchain/vizd:latest +``` + +**Per-container (docker-compose):** + +```yaml +services: + vizd: + image: vizblockchain/vizd:latest + logging: + driver: local + options: + max-size: "100m" + max-file: "5" +``` + +> With `max-file: 5` and `max-size: 100m` Docker keeps at most 500 MB of logs per container and automatically deletes the oldest file when rotating. + +--- + ## Troubleshooting | Symptom | Cause | Fix | @@ -176,3 +229,4 @@ shared-file-size = 4G | No peers | Firewall blocking port 2001 | Open port 2001 TCP inbound | | Slow sync | No snapshot loaded | Provide snapshot in volume before first start | | `Permission denied` on `/var/lib/vizd` | Volume ownership mismatch | `chown -R 1000:1000 /data/vizd` | +| Disk fills up with Docker logs | `json-file` driver has no size limit | Configure `local` driver with `max-size`/`max-file` — see [Log Rotation](#log-rotation) | diff --git a/docs/node/getting-started.md b/docs/node/getting-started.md index b7dae55713..5c8864e78b 100644 --- a/docs/node/getting-started.md +++ b/docs/node/getting-started.md @@ -171,7 +171,6 @@ Check `head_block_number` — it should increase every 3 seconds once synced. | Full node | `config.ini` | All plugins, public RPC endpoints | | Validator | `config_witness.ini` | Block production, RPC on localhost only | | Testnet | `config_testnet.ini` | Development and testing | -| Low-memory | `config.ini` + `LOW_MEMORY_NODE` build flag | Consensus only, no history indexes | --- diff --git a/docs/plugins/overview.md b/docs/plugins/overview.md index 3213aa4869..36c40c15c3 100644 --- a/docs/plugins/overview.md +++ b/docs/plugins/overview.md @@ -210,34 +210,51 @@ Reverse-lookup accounts by public key. --- -### `account_history` +### `operation_history` -Per-account operation history, paginated. +All-operations index for block-level and transaction queries. | Method | Description | |--------|-------------| -| `get_account_history(account, from, limit)` | Get operations; `from=-1` returns newest; max 1000 per call | +| `get_ops_in_block(block_num, virtual_ops)` | Operations in a block; `virtual_ops=true` includes virtual ops | +| `get_transaction(tx_id)` | Transaction by ID | **Config options:** -- `track-account-range` — account name range to index (default: all accounts) +- `history-whitelist-ops` / `history-blacklist-ops` — filter which op types are stored +- `history-start-block` — start indexing from this block number - `history-count-blocks` — retain N blocks of history --- -### `operation_history` +### `account_history` -All-operations index for block-level and transaction queries. +Per-account operation history, paginated. | Method | Description | |--------|-------------| -| `get_ops_in_block(block_num, virtual_ops)` | Operations in a block; `virtual_ops=true` includes virtual ops | -| `get_transaction(tx_id)` | Transaction by ID | +| `get_account_history(account, from, limit)` | Get operations; `from=-1` returns newest; max 1000 per call | **Config options:** -- `history-whitelist-ops` / `history-blacklist-ops` — filter which op types are stored -- `history-start-block` — start indexing from this block number +- `track-account-range` — account name range to index (default: all accounts) - `history-count-blocks` — retain N blocks of history +> **Dependency:** `account_history` **requires** `operation_history` as a parent plugin +> (`APPBASE_PLUGIN_REQUIRES`). The node will not start if `operation_history` is absent. +> `account_history` stores `operation_id_type` references (foreign keys) to `operation_object` rows +> managed by `operation_history`; at query time `get_account_history` resolves them via +> `database.get(itr->op)`. +> +> **Always enable both plugins together:** +> ```ini +> plugin = operation_history +> plugin = account_history +> ``` +> +> **Purge coordination:** Both plugins read the same `history-count-blocks` key from `config.ini` — +> there is no per-plugin separation. Setting it once applies to both simultaneously. Internally, +> `account_history` also calls `operation_history::get_min_keep_block()` on every block as a safety +> check, ensuring its entries never reference a purged `operation_object`. + --- ### `committee_api` @@ -325,8 +342,8 @@ plugin = database_api plugin = network_broadcast_api plugin = validator_api plugin = account_by_key -plugin = account_history plugin = operation_history +plugin = account_history plugin = committee_api plugin = invite_api plugin = paid_subscription_api diff --git a/docs/plugins/webserver.md b/docs/plugins/webserver.md index c8262af9f9..f18e5a4b9a 100644 --- a/docs/plugins/webserver.md +++ b/docs/plugins/webserver.md @@ -93,6 +93,25 @@ Subscriptions require a persistent WebSocket connection. They are not available --- +## CORS + +The webserver plugin handles browser cross-origin requests natively — no reverse proxy is required for local or development setups. + +**Preflight requests** (`OPTIONS`) are answered immediately with: + +| Response header | Value | +|----------------|-------| +| `Access-Control-Allow-Origin` | `*` | +| `Access-Control-Allow-Methods` | `POST, GET, OPTIONS` | +| `Access-Control-Allow-Headers` | `Content-Type, Authorization` | +| `Access-Control-Max-Age` | `86400` | + +**All other HTTP responses** include `Access-Control-Allow-Origin: *`. + +This allows browser-based wallets and dApps to call the JSON-RPC endpoint directly. For production deployments behind nginx, CORS is handled at the proxy layer (see [Exposing the API via HTTPS](#exposing-the-api-via-https-nginx--certbot)) — both layers setting the header is harmless. + +--- + ## Security - **Bind to localhost** (`127.0.0.1`) and use a reverse proxy (nginx/Caddy) for public exposure. Binding to `0.0.0.0` exposes the RPC directly to the network. @@ -137,22 +156,6 @@ server { } location / { - # CORS — allow any origin (public API) - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; - - if ($request_method = 'OPTIONS') { - add_header 'Access-Control-Allow-Origin' '*' always; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always; - add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' always; - add_header 'Access-Control-Max-Age' 1728000; - add_header 'Content-Type' 'text/plain charset=UTF-8'; - add_header 'Content-Length' 0; - return 204; - } - proxy_pass http://127.0.0.1:8090; proxy_http_version 1.1; diff --git a/docs/storage/shared-memory.md b/docs/storage/shared-memory.md index a8e1de74b2..9699b9791b 100644 --- a/docs/storage/shared-memory.md +++ b/docs/storage/shared-memory.md @@ -92,10 +92,21 @@ skip-virtual-ops = true The database auto-grows when free space drops below `min-free-shared-file-size`. Each resize: -1. Pauses all operations (including block production and API requests). -2. Destroys the current memory mapping. -3. Extends the file by `inc-shared-file-size`. -4. Re-maps the file and rebuilds all index pointers. +1. Writes a `resize_in_progress` crash marker file. +2. Flushes all dirty pages to disk (`flush()`). +3. Pauses all operations (including block production and API requests). +4. Destroys the current memory mapping. +5. Extends the file by `inc-shared-file-size`. +6. Re-maps the file and rebuilds all index pointers. +7. Validates key objects (e.g., `dynamic_global_property_object`) survived the remap. +8. Removes the crash marker. + +### Safety Mechanisms + +- **Flush-before-resize:** Dirty pages are written to disk before the mapping is destroyed, ensuring the on-disk file is consistent if anything fails during grow. +- **Crash marker:** A `resize_in_progress` file is written before the destructive remap and removed after success. If the process crashes mid-resize, the marker survives and triggers automatic recovery on the next startup. +- **Post-resize validation:** After the remap, the node verifies that `max_memory()` matches the expected size and that critical objects (e.g., `dynamic_global_property_object`) are intact. Corruption is detected early instead of causing confusing downstream failures. +- **bad_alloc safety:** If shared memory is exhausted during block application, the undo session is safely discarded (rather than attempting a doomed undo that would crash the process via `std::terminate`). A deferred resize is scheduled for the next block. Pre-allocate `shared-file-size` generously to minimize resize frequency. Each resize causes a latency spike. @@ -121,11 +132,12 @@ Approximate usage for a VIZ mainnet full node: ``` 1. Open shared_memory.bin (grow if shared-file-size is larger) 2. Acquire exclusive file lock -3. Initialize indices -4. If genesis missing → init_genesis() -5. Open block_log or dlt_block_log -6. undo_all() → rewind to last irreversible block -7. Verify head block matches block log +3. Check for resize_in_progress crash marker → trigger recovery if found +4. Initialize indices +5. If genesis missing → init_genesis() +6. Open block_log or dlt_block_log +7. undo_all() → rewind to last irreversible block +8. Verify head block matches block log ``` --- @@ -137,6 +149,8 @@ Approximate usage for a VIZ mainnet full node: | `CRITICAL: validator X account object MISSING` | Corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | `Could not modify object, uniqueness constraint violated` | Corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | `Unable to acquire READ lock` | Lock contention — increase `read-wait-micro` / enable `single-write-thread` | +| `Shared memory corrupted: previous resize() crashed` | Interrupted resize — use `--replay-from-snapshot --snapshot-auto-latest` | +| `dynamic_global_property_object missing after resize` | Resize corruption — use `--replay-from-snapshot --snapshot-auto-latest` | | Node crashes in a loop on startup | Corrupted file — `--replay-from-snapshot --snapshot-auto-latest` | Recovery options: diff --git a/libraries/chain/database.cpp b/libraries/chain/database.cpp index 10fafcb33b..c5dbe8b1dd 100644 --- a/libraries/chain/database.cpp +++ b/libraries/chain/database.cpp @@ -253,9 +253,46 @@ namespace graphene { namespace chain { } _block_log.open(data_dir / "block_log"); + ilog("block_log opened, head=${h}", ("h", _block_log.head() ? std::to_string(_block_log.head()->block_num()) : std::string("none"))); _dlt_block_log.open(data_dir / "dlt_block_log"); + ilog("dlt_block_log opened, head=${h}", ("h", _dlt_block_log.head() ? std::to_string(_dlt_block_log.head_block_num()) : std::string("none"))); // Rewind all undo state. This should return us to the state at the last irreversible block. + // + // Crash guard: detect incomplete resize from a previous run. + // resize() writes a marker before the destructive grow/remap + // and removes it after success. If the marker survived, the + // shared memory file may be in an inconsistent state (file was + // grown but the mapping was never rebuilt). Treat this the + // same as undo_all corruption and trigger recovery. + auto resize_marker = shared_mem_dir / "resize_in_progress"; + if (boost::filesystem::exists(resize_marker)) { + wlog("Detected incomplete resize from previous startup. " + "Shared memory is likely corrupted. " + "Throwing revision mismatch to trigger recovery."); + FC_THROW_EXCEPTION(database_revision_exception, + "Shared memory corrupted: previous resize() crashed (marker detected)"); + } + + // Crash guard: undo_all() walks shared-memory data structures that may + // be corrupted after a hard crash (SIGSEGV). Since a segfault kills the + // process instantly, no C++ exception handler can catch it. We use a + // marker file to detect that a previous run died inside undo_all() and + // throw database_revision_exception to trigger the recovery path instead. + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + wlog("Detected incomplete undo_all from previous startup. " + "Shared memory is likely corrupted. " + "Throwing revision mismatch to trigger recovery."); + FC_THROW_EXCEPTION(database_revision_exception, + "Shared memory corrupted: previous undo_all() crashed (marker detected)"); + } + + // Write marker — will be removed after undo_all() succeeds. + // If the process crashes inside undo_all(), the marker survives + // and triggers recovery on the next startup. + { std::ofstream f(undo_marker.string()); } + ilog("Calling undo_all()..."); // Wrap in a try-catch for boost::interprocess::lock_exception: // After a hard crash, the previous process may have died while holding // shared-memory internal mutexes (e.g., inside managed_mapped_file allocator). @@ -275,8 +312,14 @@ namespace graphene { namespace chain { "Shared memory lock corrupted (previous crash): ${what}", ("what", e.what())); } + // undo_all() completed successfully — remove the crash marker. + boost::filesystem::remove(undo_marker); + ilog("undo_all() completed, revision=${rev} head_block_num=${hbn}", + ("rev", revision())("hbn", head_block_num())); if (revision() != head_block_num()) { + ilog("Revision mismatch: revision=${rev} != head_block_num=${hbn}, calling init_hardforks()", + ("rev", revision())("hbn", head_block_num())); with_strong_read_lock([&]() { init_hardforks(); // Writes to local state, but reads from db }); @@ -290,6 +333,7 @@ namespace graphene { namespace chain { } if (head_block_num()) { + ilog("Validating block log consistency, head_block_num=${h}", ("h", head_block_num())); // Validate DLT block log consistency before seeding fork_db. // After a crash, the DLT block log index/data files can become // truncated (e.g., only 1 block when database has thousands). @@ -303,14 +347,16 @@ namespace graphene { namespace chain { _dlt_block_log.reset(); } + ilog("Reading head block #${n} from block_log", ("n", head_block_num())); auto head_block = _block_log.read_block_by_num(head_block_num()); if (head_block.valid()) { // Block_log has the head block FC_ASSERT(head_block->id() == head_block_id(), "Chain state does not match block log. Please reindex blockchain."); + ilog("Head block found in block_log, starting fork_db and seeding"); _fork_db.start_block(*head_block); - // P22 fix: Seed fork_db with recent blocks (up to 100) + // Seed fork_db with recent blocks (up to 100) // so that incoming sync blocks from peers near our head // can find their parent chain. After restart, fork_db only // has the head block; if peers send blocks a few behind @@ -343,9 +389,8 @@ namespace graphene { namespace chain { } else { // DLT mode: block_log is empty but chainbase has state (loaded from snapshot). set_dlt_mode(true); - wlog("DLT mode detected: block log is empty but database has state at block ${n}. " - "Skipping block log validation.", - ("n", head_block_num())); + ilog("DLT mode: block_log empty, seeding fork_db from DLT log for head_block_num=${h}", + ("h", head_block_num())); // Seed fork_db bottom-up from the oldest available DLT block // within a seeding window so that all blocks from oldest to @@ -424,11 +469,14 @@ namespace graphene { namespace chain { wlog("Done opening block log, elapsed time ${t} sec", ("t", double((end - start).count()) / 1000000.0)); } + ilog("Block log open complete, calling init_hardforks()"); with_strong_read_lock([&]() { init_hardforks(); // Writes to local state, but reads from db }); + ilog("init_hardforks() completed"); // === HARDFORK 12: EMERGENCY SCHEDULE RECOVERY === + ilog("Checking validator schedule integrity at head_block_num=${h}", ("h", head_block_num())); // If the node shut down (or crashed) during emergency mode while // update_validator_schedule() had zeroed the schedule but before the // hybrid override could fill it with committee, the schedule may @@ -501,6 +549,15 @@ namespace graphene { namespace chain { _dlt_mode = true; // Set before init_genesis so all subsequent code sees DLT mode + // Clean up undo_all crash marker if present from a previous failed startup. + // chainbase::database::wipe() only removes shared_memory.bin, not other files + // in the directory, so we must do this explicitly. + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + wlog("Removing stale undo_all crash marker before snapshot import"); + boost::filesystem::remove(undo_marker); + } + // Always wipe shared memory before snapshot import to ensure clean state. // This prevents conflicts if: // - A previous snapshot import attempt failed mid-way @@ -793,7 +850,29 @@ namespace graphene { namespace chain { "\033[33mShared memory growing on block ${block}: actual data ${used_before}M / current ${max_before}M -> new ${mem}M\033[0m", ("block", current_block_num)("mem", new_max / (1024 * 1024)) ("used_before", used_mem_before / (1024 * 1024))("max_before", max_mem / (1024 * 1024))); + dlog("Shared memory resize: flushing segment and remapping ${cur}M -> ${new}M", + ("cur", max_mem / (1024 * 1024))("new", new_max / (1024 * 1024))); resize(new_max); + dlog("Shared memory resize: remap complete, validating segment"); + + // Post-resize validation: verify key objects survived the remap. + // A silent grow failure (file unchanged but open succeeds with + // old size) or corrupted segment metadata would cause later + // operations to fail in confusing ways. Catch it early here. + if (max_memory() < new_max) { + elog("CRITICAL: shared memory resize did not increase capacity! " + "expected=${exp} actual=${act}. File may be corrupted.", + ("exp", new_max)("act", max_memory())); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "Resize failed: capacity ${act} < expected ${exp}", + ("act", max_memory())("exp", new_max)); + } + if (!find()) { + elog("CRITICAL: dynamic_global_property_object MISSING after resize. " + "Shared memory is corrupted."); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "dynamic_global_property_object missing after resize"); + } uint64_t free_mem = free_memory(); uint64_t reserved_mem = reserved_memory(); @@ -815,6 +894,17 @@ namespace graphene { namespace chain { return; } + // Serialize concurrent resize attempts: the P2P thread (push_block) + // and the validator thread (generate_block) both call this before + // acquiring their respective write locks. Without this mutex both + // can see _pending_resize==true simultaneously, both pass + // begin_resize_barrier(), and both call resize() concurrently — + // corrupting the chainbase segment (double-resize race). + std::unique_lock resize_entry_guard(_apply_resize_mutex); + if (!_pending_resize) { + return; // another thread completed the resize while we waited + } + // Use the resize barrier to pause ALL database operations. // This is stronger than with_strong_write_lock: it also blocks // lockless reads (e.g. get_slot_at_time, get_scheduled_validator, @@ -836,7 +926,26 @@ namespace graphene { namespace chain { ilog("\033[33mApplying deferred shared memory resize: actual data ${used_before}M / current ${max_before}M -> new ${mem}M\033[0m", ("used_before", used_mem_before / (1024 * 1024))("max_before", max_mem_before / (1024 * 1024)) ("mem", target / (1024 * 1024))); + dlog("Shared memory resize: flushing segment and remapping ${cur}M -> ${new}M", + ("cur", max_mem_before / (1024 * 1024))("new", target / (1024 * 1024))); resize(target); + dlog("Shared memory resize: remap complete, validating segment"); + + // Post-resize validation: verify key objects survived the remap. + if (max_memory() < target) { + elog("CRITICAL: deferred shared memory resize did not increase capacity! " + "expected=${exp} actual=${act}. File may be corrupted.", + ("exp", target)("act", max_memory())); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "Deferred resize failed: capacity ${act} < expected ${exp}", + ("act", max_memory())("exp", target)); + } + if (!find()) { + elog("CRITICAL: dynamic_global_property_object MISSING after deferred resize. " + "Shared memory is corrupted."); + FC_THROW_EXCEPTION(shared_memory_corruption_exception, + "dynamic_global_property_object missing after resize"); + } uint64_t free_mem = free_memory(); uint64_t reserved_mem = reserved_memory(); @@ -897,6 +1006,16 @@ namespace graphene { namespace chain { void database::wipe(const fc::path &data_dir, const fc::path &shared_mem_dir, bool include_blocks) { close(); chainbase::database::wipe(shared_mem_dir); + // Remove undo_all crash marker if present (chainbase::wipe only removes shared_memory.bin) + auto undo_marker = shared_mem_dir / "undo_all_in_progress"; + if (boost::filesystem::exists(undo_marker)) { + boost::filesystem::remove(undo_marker); + } + // Remove resize crash marker if present + auto resize_marker = shared_mem_dir / "resize_in_progress"; + if (boost::filesystem::exists(resize_marker)) { + boost::filesystem::remove(resize_marker); + } if (include_blocks) { fc::remove_all(data_dir / "block_log"); fc::remove_all(data_dir / "block_log.index"); @@ -2042,9 +2161,37 @@ namespace graphene { namespace chain { } try { - auto session = start_undo_session(); - apply_block(new_block, skip); - session.push(); + // Heap-allocate the undo session so we can explicitly + // destroy it before exception unwinding reaches it. + // If bad_alloc fires inside apply_block(), the session + // destructor would call undo() which writes to shared + // memory. With memory exhausted, undo() throws another + // bad_alloc during stack unwinding -> double exception + // -> std::terminate. By resetting the unique_ptr in + // our catch block, the session is destroyed cleanly + // before the exception propagates further. + auto session = std::unique_ptr( + new chainbase::database::session(start_undo_session())); + try { + apply_block(new_block, skip); + } catch (const fc::exception& e) { + // fc::exception is the actual type thrown by apply_block() + // (FC_CAPTURE_AND_RETHROW wraps std::exception into fc::exception). + // Same bad_alloc guard as the std::exception handler below. + try { session.reset(); } catch (...) {} + throw; + } catch (const std::exception& e) { + // Attempt explicit undo before rethrowing. If undo() + // throws (shared memory exhausted), suppress it — the + // chainbase session destructor's uncaught_exceptions() + // guard will NOT fire here (we're in a catch block, not + // in stack unwinding), so we must protect manually. + // The original exception is preserved and rethrown. + try { session.reset(); } catch (...) {} + throw; + } + session->push(); + session.reset(); } catch (const wrong_scheduled_validator_exception &e) { // Schedule mismatch: keep the block in fork_db as a @@ -2250,6 +2397,15 @@ namespace graphene { namespace chain { // while we hold raw pointers/references into the mapped segment. // The guard is scoped so it is released before with_strong_write_lock // (which acquires its own operation guard internally). + // Diagnostic fields saved from validator_obj for the locked re-check below. + // validator_obj is a shared-memory reference valid only inside the op_guard scope; + // copy before the scope ends so we can log them if the re-check confirms missing. + bool lockless_validator_account_missing = false; + public_key_type diag_signing_key; + uint32_t diag_total_missed = 0; + uint16_t diag_penalty = 0; + uint32_t diag_last_confirmed = 0; + size_t diag_account_index_size = 0; { auto op_guard = make_operation_guard(); @@ -2260,30 +2416,48 @@ namespace graphene { namespace chain { const auto &validator_obj = get_validator(validator_owner); - // Pre-check: ensure the validator account exists before generating the block. - // If the account is missing from the database (shared memory corruption), - // the block will be produced but fail to apply internally (process_funds - // calls get_account which would throw "unknown key"). - const auto* validator_acct = find_account(validator_owner); - if (!validator_acct) { - auto& acc_idx = get_index().indices().get(); + if (!(skip & skip_validator_signature)) + FC_ASSERT(validator_obj.signing_key == + block_signing_private_key.get_public_key()); + + // Lockless hint: does the validator account exist? + // op_guard coordinates with resize only — it does NOT exclude writers. + // A concurrent P2P writer rebalancing the by_name red-black tree can make + // this lookup transiently return null for an existing key. Save the hint + // and re-verify under a read lock AFTER this scope (op_guard released), + // to avoid a deadlock: with_strong_read_lock() nests its own op_guard, and + // if resize set _resize_in_progress between now and then, the nested + // enter_operation() would wait for resize while resize waits for us → deadlock. + if (!find_account(validator_owner)) { + lockless_validator_account_missing = true; + diag_signing_key = validator_obj.signing_key; + diag_total_missed = validator_obj.total_missed; + diag_penalty = validator_obj.penalty_percent; + diag_last_confirmed = validator_obj.last_confirmed_block_num; + diag_account_index_size = + get_index().indices().get().size(); + } + } // op_guard released here + + // Re-verify suspected missing account under a read lock, which blocks writers + // and yields a consistent index traversal. If still missing — real corruption. + if (lockless_validator_account_missing) { + with_strong_read_lock([&]() { + if (find_account(validator_owner)) { + return; // false alarm: lockless read raced a concurrent P2P writer + } elog("CRITICAL: Validator ${w} account object MISSING from database! " "This is impossible state - shared memory may be corrupted. " "signing_key=${k} total_missed=${m} penalty=${p} last_confirmed=${lc} " "account_index_size=${idx_size}", - ("w", validator_owner)("k", validator_obj.signing_key) - ("m", validator_obj.total_missed)("p", validator_obj.penalty_percent) - ("lc", validator_obj.last_confirmed_block_num) - ("idx_size", acc_idx.size())); + ("w", validator_owner)("k", diag_signing_key) + ("m", diag_total_missed)("p", diag_penalty) + ("lc", diag_last_confirmed)("idx_size", diag_account_index_size)); FC_THROW_EXCEPTION(shared_memory_corruption_exception, "CRITICAL: Validator ${w} account not found in database! Shared memory corruption suspected.", ("w", validator_owner)); - } - - if (!(skip & skip_validator_signature)) - FC_ASSERT(validator_obj.signing_key == - block_signing_private_key.get_public_key()); - } // op_guard released here + }); + } // Second operation guard covers all remaining lockless reads // in this function: get_dynamic_global_properties(), head_block_id(), @@ -5500,7 +5674,7 @@ namespace graphene { namespace chain { ("w", validator_missed.owner) ("n", head_block_num() + i + 1) ("t", get_slot_time(i + 1)) - ("next", b.validator)); + ("next", get_scheduled_validator(i + 2))); } modify(validator_missed, [&](validator_object &w) { diff --git a/libraries/chain/include/graphene/chain/database.hpp b/libraries/chain/include/graphene/chain/database.hpp index ae58e89bd8..74df6f6274 100644 --- a/libraries/chain/include/graphene/chain/database.hpp +++ b/libraries/chain/include/graphene/chain/database.hpp @@ -12,7 +12,9 @@ #include +#include #include +#include namespace graphene { namespace chain { @@ -677,8 +679,14 @@ namespace graphene { namespace chain { uint32_t _block_num_check_free_memory = 1000; - bool _pending_resize = false; - size_t _pending_resize_target = 0; + std::atomic _pending_resize{false}; + std::atomic _pending_resize_target{0}; + // Serializes concurrent apply_pending_resize() calls from the + // validator thread and the P2P thread. Both call it before their + // respective write locks, so without this mutex both threads can + // see _pending_resize==true simultaneously and double-resize, + // corrupting the chainbase segment. + std::mutex _apply_resize_mutex; bool _skip_virtual_ops = false; bool _enable_plugins_on_push_transaction = false; diff --git a/libraries/network/dlt_p2p_node.cpp b/libraries/network/dlt_p2p_node.cpp index 766dc495aa..a05d6448a5 100644 --- a/libraries/network/dlt_p2p_node.cpp +++ b/libraries/network/dlt_p2p_node.cpp @@ -143,13 +143,30 @@ void dlt_p2p_node::start() { // Start periodic task fiber if (_thread) { _periodic_fiber = _thread->async([this]() { + uint32_t consecutive_errors = 0; + fc::time_point last_error_log; while (_running) { try { fc::usleep(fc::seconds(5)); if (!_running) break; periodic_task(); + consecutive_errors = 0; } catch (const fc::exception& e) { - elog("Error in DLT P2P periodic task: ${e}", ("e", e.to_detail_string())); + consecutive_errors++; + auto now = fc::time_point::now(); + if (consecutive_errors == 1 || (now - last_error_log).count() > 60 * 1000000LL) { + elog("Error in DLT P2P periodic task (#${n}): ${e}", + ("n", consecutive_errors)("e", e.to_detail_string())); + last_error_log = now; + } + } catch (const std::exception& e) { + consecutive_errors++; + auto now = fc::time_point::now(); + if (consecutive_errors == 1 || (now - last_error_log).count() > 60 * 1000000LL) { + elog("Error in DLT P2P periodic task (#${n}): ${e}", + ("n", consecutive_errors)("e", std::string(e.what()))); + last_error_log = now; + } } } }, "dlt periodic_task"); @@ -285,7 +302,7 @@ void dlt_p2p_node::connect_to_peer(const fc::ip::endpoint& ep) { // Send hello send_message(pid, message(build_hello_message())); - ilog(DLT_LOG_GREEN "Connected to peer ${ep}, sent DLT hello" DLT_LOG_RESET, ("ep", ep)); + dlog(DLT_LOG_GREEN "Connected to peer ${ep}, sent DLT hello" DLT_LOG_RESET, ("ep", ep)); // Start read loop as a fiber on the p2p thread start_read_loop(pid); @@ -342,23 +359,29 @@ void dlt_p2p_node::handle_disconnect(peer_id peer, const std::string& reason, bo } } - // Cancel read fiber — cancel_and_wait yields, allowing drain_send_queue - // to resume on this thread. The reentrancy guard above ensures that - // reentrant handle_disconnect call returns immediately without touching - // _peer_states, so state/it remain valid when we resume here. - auto fiber_it = _read_fibers.find(peer); - if (fiber_it != _read_fibers.end()) { - try { if (fiber_it->second.valid()) fiber_it->second.cancel_and_wait(__FUNCTION__); } catch (...) {} - _read_fibers.erase(fiber_it); - } - - // Close connection + // Close the socket FIRST — this immediately unblocks any pending readsome/writesome + // in the read fiber and drain_send_queue fiber, causing them to throw and exit. + // drain_send_queue holds sock by owning shared_ptr copy, so erasing _connections + // here does not leave it with a dangling reference. + // If we closed AFTER cancel_and_wait, the fiber would be stuck waiting for network + // I/O that can never arrive on a dead peer — causing a multi-second hang per peer, + // and a full deadlock when N peers disconnect simultaneously (p82: silent reboot). auto conn_it = _connections.find(peer); if (conn_it != _connections.end()) { try { if (conn_it->second) conn_it->second->close(); } catch (...) {} _connections.erase(conn_it); } + // Cancel read fiber — cancel_and_wait yields, but the fiber exits immediately + // because its socket I/O is already unblocked by the close() above. + // The reentrancy guard above ensures that reentrant handle_disconnect calls + // return immediately without touching _peer_states, so state/it remain valid. + auto fiber_it = _read_fibers.find(peer); + if (fiber_it != _read_fibers.end()) { + try { if (fiber_it->second.valid()) fiber_it->second.cancel_and_wait(__FUNCTION__); } catch (...) {} + _read_fibers.erase(fiber_it); + } + // Clear send guard and drain any queued messages _peer_sending.erase(peer); state.send_queue.clear(); @@ -514,6 +537,10 @@ void dlt_p2p_node::send_message(peer_id peer, const message& msg) { ++state.send_queue_total; } else { // Queue is at max depth — peer can't consume data fast enough. + // Skip if disconnect is already in progress: handle_disconnect yields + // at cancel_and_wait, allowing other fibers to call send_message for + // the same peer, which would re-enter this branch and spam the log. + if (_disconnect_in_progress.count(peer)) return; // Capture info before handle_disconnect potentially erases the state. std::string ep = std::string(state.endpoint); uint32_t dropped = state.send_queue_dropped; @@ -538,7 +565,7 @@ void dlt_p2p_node::drain_send_queue(peer_id peer, std::vector buf) { _peer_sending.erase(peer); return; } - auto& sock = conn_it->second; + auto sock = conn_it->second; // owning copy — handle_disconnect may erase _connections while we yield in writesome // Cache endpoint before entering the try block — handle_disconnect may // remove the peer from _peer_states before the catch block runs, making @@ -1770,7 +1797,7 @@ void dlt_p2p_node::on_dlt_peer_exchange_reply(peer_id peer, const dlt_peer_excha void dlt_p2p_node::on_dlt_peer_exchange_rate_limited(peer_id peer, const dlt_peer_exchange_rate_limited& msg) { auto it = _peer_states.find(peer); auto ep = (it != _peer_states.end()) ? std::string(it->second.endpoint) : std::to_string(peer); - ilog(DLT_LOG_DGRAY "Peer ${ep} rate-limited our exchange request, wait ${w}s" DLT_LOG_RESET, + dlog(DLT_LOG_DGRAY "Peer ${ep} rate-limited our exchange request, wait ${w}s" DLT_LOG_RESET, ("ep", ep)("w", msg.wait_seconds)); // Record the rate-limit locally so periodic_peer_exchange() stops @@ -1949,9 +1976,18 @@ void dlt_p2p_node::on_dlt_gap_fill_reply(peer_id peer, const dlt_gap_fill_reply& uint32_t our_head_now = _delegate->get_head_block_num(); uint32_t peer_latest = std::max(it->second.peer_dlt_latest, it->second.peer_head_num); if (peer_latest > our_head_now) { + uint32_t our_lib = _delegate->get_lib_block_num(); wlog(DLT_LOG_ORANGE "Gap fill: dead-fork block #${n} from peer ${ep} (peer=#${p} > our head #${h})" - " — our fork is losing, re-syncing from LIB instead of banning" DLT_LOG_RESET, - ("n", block.block_num())("ep", it->second.endpoint)("p", peer_latest)("h", our_head_now)); + " — our fork is losing, next gap fill will start from LIB #${lib}" DLT_LOG_RESET, + ("n", block.block_num())("ep", it->second.endpoint)("p", peer_latest) + ("h", our_head_now)("lib", our_lib)); + // Set override so request_gap_fill() starts from LIB on the + // next call: blocks from LIB include the divergence point and + // land in fork_db as FORK_DB_ONLY, allowing a fork switch once + // the majority chain reaches sufficient length. + if (our_lib > 0 && our_lib < our_head_now) { + _gap_fill_fork_override_start = our_lib; + } transition_to_sync(); request_blocks_from_peer(peer); } else { @@ -2028,6 +2064,19 @@ void dlt_p2p_node::request_gap_fill() { uint32_t our_head = _delegate->get_head_block_num(); if (our_head == 0) return; + // Fork-losing override: when on_dlt_gap_fill_repl detected that our fork + // is losing (dead-fork block, peer is ahead), it sets this to our LIB. + // Starting from LIB instead of our_head ensures the request covers the + // divergence point; the majority-chain blocks land in fork_db as + // FORK_DB_ONLY and eventually trigger a fork switch. One-shot: cleared + // after use so normal gap fill resumes from our_head on subsequent calls. + if (_gap_fill_fork_override_start > 0 && _gap_fill_fork_override_start < our_head) { + ilog(DLT_LOG_ORANGE "Gap fill fork override: starting from LIB #${lib} instead of head #${h} to find majority chain divergence" DLT_LOG_RESET, + ("lib", _gap_fill_fork_override_start)("h", our_head)); + our_head = _gap_fill_fork_override_start; + } + _gap_fill_fork_override_start = 0; + // Gap fill works in both FORWARD and SYNC modes. // In SYNC mode, when request_blocks_from_peer() can't bridge a gap // (blocks below the syncing peer's DLT range), gap fill provides an @@ -2121,21 +2170,16 @@ void dlt_p2p_node::request_gap_fill() { ("ep", peer_state.endpoint)("ph", peer_state.peer_head_num)("ex", peer_state.exchange_enabled)); send_message(any_active_peer, message(req)); } else { - // P39 fix: No peer at all with a higher head — gap fill - // can't help. Transition to SYNC immediately instead of - // waiting for stagnation detection. - wlog("Gap fill: no peer available — transitioning to SYNC"); + // No peer with a higher head is available for gap fill. + // Do NOT transition to SYNC — without a peer ahead of us, + // request_blocks_from_peer() would immediately see all peers + // as "caught up" and call transition_to_forward(), producing + // rapid SYNC→FORWARD oscillation. Instead, just log and let + // the periodic task retry when new peers connect or existing + // peers advance their head. + wlog("Gap fill: no peer available with higher head — waiting for peers"); _gap_fill_in_progress = false; _gap_fill_start_time = fc::time_point(); - transition_to_sync(); - // Request blocks from all active peers - for (const auto& _pi : _peer_states) { - const auto& state = _pi.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || - state.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) { - request_blocks_from_peer(_pi.first); - } - } } } @@ -2386,6 +2430,14 @@ void dlt_p2p_node::resume_block_processing() { run_resume_on_p2p_thread(); } +void dlt_p2p_node::reset_peers_after_recovery() { + // Called from the P2P thread after auto-recovery completes. + // Clears all soft-bans so peers that were banned before the + // corruption (and may carry the majority fork) can reconnect + // and serve blocks immediately. + emergency_peer_reset(); +} + bool dlt_p2p_node::is_on_majority_fork() const { return _fork_status != DLT_FORK_STATUS_MINORITY; } @@ -3358,10 +3410,18 @@ void dlt_p2p_node::block_validation_timeout() { void dlt_p2p_node::periodic_task() { // Non-DB-access housekeeping always runs. - periodic_reconnect_check(); - periodic_lifecycle_timeout_check(); - block_validation_timeout(); - periodic_mempool_cleanup(); + try { periodic_reconnect_check(); } + catch (const fc::exception& e) { wlog("periodic_reconnect_check: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("periodic_reconnect_check: ${e}", ("e", std::string(e.what()))); } + try { periodic_lifecycle_timeout_check(); } + catch (const fc::exception& e) { wlog("periodic_lifecycle_timeout_check: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("periodic_lifecycle_timeout_check: ${e}", ("e", std::string(e.what()))); } + try { block_validation_timeout(); } + catch (const fc::exception& e) { wlog("block_validation_timeout: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("block_validation_timeout: ${e}", ("e", std::string(e.what()))); } + try { periodic_mempool_cleanup(); } + catch (const fc::exception& e) { wlog("periodic_mempool_cleanup: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("periodic_mempool_cleanup: ${e}", ("e", std::string(e.what()))); } // When block processing is paused (snapshot creation in progress), // skip periodic operations that need database read locks. The snapshot @@ -3369,85 +3429,110 @@ void dlt_p2p_node::periodic_task() { // lock from this fiber would time out and cascade into peer disconnections. if (_block_processing_paused) { // Still check banned peers for unban -- no DB access needed. - for (auto& _peer_item : _peer_states) { - auto& state = _peer_item.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { - auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; - auto elapsed = fc::time_point::now() - state.state_entered_time; - if (elapsed.count() > ban_dur * 1000000) { - state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; - state.disconnected_since = fc::time_point::now(); - state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); - ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + try { + for (auto& _peer_item : _peer_states) { + auto& state = _peer_item.second; + if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { + auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; + auto elapsed = fc::time_point::now() - state.state_entered_time; + if (elapsed.count() > ban_dur * 1000000) { + state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; + state.disconnected_since = fc::time_point::now(); + state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); + ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + } } } - } + } catch (const fc::exception& e) { wlog("unban_check(paused): ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("unban_check(paused): ${e}", ("e", std::string(e.what()))); } return; } // Normal path: all periodic operations run. - sync_stagnation_check(); - check_sync_catchup(); // P26 fix: periodic catch-up detection - check_forward_behind(); // P27 fix: detect falling behind in FORWARD mode - check_forward_stagnation(); // P37 fix: detect head stuck in FORWARD mode - request_gap_fill(); // P36 fix: fill gaps via exchange-enabled peers - periodic_peer_exchange(); + try { sync_stagnation_check(); } + catch (const fc::exception& e) { wlog("sync_stagnation_check: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("sync_stagnation_check: ${e}", ("e", std::string(e.what()))); } + try { check_sync_catchup(); } // P26 fix: periodic catch-up detection + catch (const fc::exception& e) { wlog("check_sync_catchup: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("check_sync_catchup: ${e}", ("e", std::string(e.what()))); } + try { check_forward_behind(); } // P27 fix: detect falling behind in FORWARD mode + catch (const fc::exception& e) { wlog("check_forward_behind: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("check_forward_behind: ${e}", ("e", std::string(e.what()))); } + try { check_forward_stagnation(); } // P37 fix: detect head stuck in FORWARD mode + catch (const fc::exception& e) { wlog("check_forward_stagnation: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("check_forward_stagnation: ${e}", ("e", std::string(e.what()))); } + try { request_gap_fill(); } // P36 fix: fill gaps via exchange-enabled peers + catch (const fc::exception& e) { wlog("request_gap_fill: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("request_gap_fill: ${e}", ("e", std::string(e.what()))); } + try { periodic_peer_exchange(); } + catch (const fc::exception& e) { wlog("periodic_peer_exchange: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("periodic_peer_exchange: ${e}", ("e", std::string(e.what()))); } // Post-pause catchup: drain queued blocks and/or clear the flag // when caught up. if (_catchup_after_pause && _delegate) { - // If there are still queued blocks, drain them first - if (!_paused_block_queue.empty()) { - drain_paused_block_queue(); - } + try { + // If there are still queued blocks, drain them first + if (!_paused_block_queue.empty()) { + drain_paused_block_queue(); + } - // After drain (or if queue was empty), check if we're still behind - uint32_t our_head = _delegate->get_head_block_num(); - bool any_ahead = false; - for (const auto& _pi : _peer_states) { - const auto& s = _pi.second; - if ((s.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || - s.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) && - s.peer_head_num > our_head) { - any_ahead = true; - break; + // After drain (or if queue was empty), check if we're still behind + uint32_t our_head = _delegate->get_head_block_num(); + bool any_ahead = false; + for (const auto& _pi : _peer_states) { + const auto& s = _pi.second; + if ((s.lifecycle_state == DLT_PEER_LIFECYCLE_ACTIVE || + s.lifecycle_state == DLT_PEER_LIFECYCLE_SYNCING) && + s.peer_head_num > our_head) { + any_ahead = true; + break; + } } - } - if (!any_ahead) { - _catchup_after_pause = false; - ilog(DLT_LOG_GREEN "Post-pause catchup complete, no gap remaining (head=#${h})" DLT_LOG_RESET, - ("h", our_head)); - } + if (!any_ahead) { + _catchup_after_pause = false; + ilog(DLT_LOG_GREEN "Post-pause catchup complete, no gap remaining (head=#${h})" DLT_LOG_RESET, + ("h", our_head)); + } + } catch (const fc::exception& e) { wlog("catchup_after_pause: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("catchup_after_pause: ${e}", ("e", std::string(e.what()))); } } // Log node status every 1 minute (12 cycles at 5s) _status_log_counter++; if (_status_log_counter >= 12) { _status_log_counter = 0; - log_node_status(); + try { log_node_status(); } + catch (const fc::exception& e) { wlog("log_node_status: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("log_node_status: ${e}", ("e", std::string(e.what()))); } } // Log peer stats at configured interval (counter tracks seconds, ticks are 5s) _stats_log_counter += 5; if (_stats_log_counter >= _stats_log_interval_sec) { _stats_log_counter = 0; - log_peer_stats(); + try { log_peer_stats(); } + catch (const fc::exception& e) { wlog("log_peer_stats: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("log_peer_stats: ${e}", ("e", std::string(e.what()))); } } // Check banned peers for unban - for (auto& _peer_item : _peer_states) { - auto& state = _peer_item.second; - if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { - auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; - auto elapsed = fc::time_point::now() - state.state_entered_time; - if (elapsed.count() > ban_dur * 1000000) { - state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; - state.disconnected_since = fc::time_point::now(); - state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); - ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + try { + for (auto& _peer_item : _peer_states) { + auto& state = _peer_item.second; + if (state.lifecycle_state == DLT_PEER_LIFECYCLE_BANNED) { + auto ban_dur = (state.ban_duration_sec > 0) ? state.ban_duration_sec : BAN_DURATION_SEC; + auto elapsed = fc::time_point::now() - state.state_entered_time; + if (elapsed.count() > ban_dur * 1000000) { + state.lifecycle_state = DLT_PEER_LIFECYCLE_DISCONNECTED; + state.disconnected_since = fc::time_point::now(); + state.next_reconnect_attempt = fc::time_point::now() + fc::seconds(30); + ilog("Unbanning peer ${ep}", ("ep", state.endpoint)); + } } } - } + } catch (const fc::exception& e) { wlog("unban_check: ${e}", ("e", e.to_detail_string())); } + catch (const std::exception& e) { wlog("unban_check: ${e}", ("e", std::string(e.what()))); } } // ── Accept loop ───────────────────────────────────────────────── diff --git a/libraries/network/include/graphene/network/dlt_p2p_node.hpp b/libraries/network/include/graphene/network/dlt_p2p_node.hpp index 02ed289789..1af47924b8 100644 --- a/libraries/network/include/graphene/network/dlt_p2p_node.hpp +++ b/libraries/network/include/graphene/network/dlt_p2p_node.hpp @@ -143,6 +143,7 @@ class dlt_p2p_node { void reconnect_seeds(); void pause_block_processing(); void resume_block_processing(); + void reset_peers_after_recovery(); // ── Our node state ─────────────────────────────────────────── dlt_node_status get_node_status() const { return _node_status; } @@ -387,6 +388,13 @@ class dlt_p2p_node { static constexpr uint32_t GAP_FILL_TIMEOUT_SEC = 15; ///< Max seconds to wait for gap fill reply uint32_t _highest_seen_block_num = 0; ///< Highest block num seen from any source + // When "our fork is losing" is detected in gap fill reply, this is set to + // our LIB so the next request_gap_fill() starts from LIB instead of + // our_head. Blocks from LIB onward include the divergence point, giving + // fork_db the majority chain blocks it needs to trigger a fork switch. + // Reset to 0 after use (one-shot). + uint32_t _gap_fill_fork_override_start = 0; + // ── Gap fill rejection tracking ────────────────────────────── uint32_t _gap_rejected_block_num = 0; ///< Last block num rejected by gap fill uint32_t _gap_rejected_count = 0; ///< How many times that block was rejected diff --git a/plugins/account_by_key/account_by_key_plugin.cpp b/plugins/account_by_key/account_by_key_plugin.cpp index 8ffbac4191..89dbc6272c 100644 --- a/plugins/account_by_key/account_by_key_plugin.cpp +++ b/plugins/account_by_key/account_by_key_plugin.cpp @@ -200,8 +200,8 @@ namespace graphene { namespace plugins { namespace account_by_key { my.reset(new account_by_key_plugin_impl(*this)); graphene::chain::database &db = appbase::app().get_plugin().db(); - db.pre_apply_operation.connect([&](operation_notification &o) { my->pre_operation(o); }); - db.post_apply_operation.connect([&](const operation_notification &o) { my->post_operation(o); }); + my->_pre_op_conn = db.pre_apply_operation.connect([&](operation_notification &o) { my->pre_operation(o); }); + my->_post_op_conn = db.post_apply_operation.connect([&](const operation_notification &o) { my->post_operation(o); }); add_plugin_index(db); JSON_RPC_REGISTER_API ( name() ) ; @@ -217,7 +217,8 @@ namespace graphene { namespace plugins { namespace account_by_key { void account_by_key_plugin::plugin_shutdown() { ilog("account_by_key plugin: plugin_shutdown() begin"); - + my->_pre_op_conn.disconnect(); + my->_post_op_conn.disconnect(); ilog("account_by_key plugin: plugin_shutdown() end"); } diff --git a/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp b/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp index 0d15169ff1..27664079d5 100644 --- a/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp +++ b/plugins/account_by_key/include/graphene/plugins/account_by_key/account_by_key_plugin.hpp @@ -71,6 +71,9 @@ namespace graphene { account_by_key_plugin &_self; graphene::chain::database &_db; + + boost::signals2::connection _pre_op_conn; + boost::signals2::connection _post_op_conn; }; std::unique_ptr my; diff --git a/plugins/account_history/plugin.cpp b/plugins/account_history/plugin.cpp index a6108530a5..0d0c60b5cd 100644 --- a/plugins/account_history/plugin.cpp +++ b/plugins/account_history/plugin.cpp @@ -228,7 +228,15 @@ if( options.count(name) ) { \ std::map result; for (; itr != end; ++itr) { - result[itr->sequence] = database.get(itr->op); + // Guard against dangling operation_object references: + // operation_history may purge operation_objects before account_history + // purges the corresponding account_history_objects (e.g., different history-count-blocks). + // If the referenced object no longer exists, skip it silently. + auto op_obj = database.find(itr->op); + if (!op_obj) { + continue; + } + result[itr->sequence] = applied_operation(*op_obj); } return result; } @@ -530,6 +538,8 @@ if( options.count(name) ) { \ "Defines a range of accounts to track as a json pair [\"from\",\"to\"] [from,to]. " "Can be specified multiple times" ); + // history-count-blocks is registered by operation_history plugin and shared; + // account_history reads it in plugin_initialize() without re-registering. cfg.add(cli); } diff --git a/plugins/chain/include/graphene/plugins/chain/plugin.hpp b/plugins/chain/include/graphene/plugins/chain/plugin.hpp index e0c58cd8c1..7aa5390d00 100644 --- a/plugins/chain/include/graphene/plugins/chain/plugin.hpp +++ b/plugins/chain/include/graphene/plugins/chain/plugin.hpp @@ -96,6 +96,10 @@ namespace graphene { /// until this returns false. bool is_syncing() const; + /// Returns true while auto-recovery from shared memory corruption is running. + /// The webserver plugin uses this to return 503 instead of touching the database. + bool is_recovering() const; + /// Explicitly clear the syncing flag. Called by the P2P /// layer when sync completes (all peers report zero /// unfetched items) so that the witness plugin can resume diff --git a/plugins/chain/plugin.cpp b/plugins/chain/plugin.cpp index 4ed946d25d..f2dc26e36d 100644 --- a/plugins/chain/plugin.cpp +++ b/plugins/chain/plugin.cpp @@ -65,6 +65,7 @@ namespace chain { bool sync_start_logged = false; // guard to log sync start only once std::atomic currently_syncing{false}; // true while processing P2P sync blocks + std::atomic recovery_in_progress{false}; // true while auto-recovery is running bool pending_snapshot_load = false; // set when snapshot args present but callback not yet registered @@ -139,7 +140,7 @@ namespace chain { // particularly when the emergency master receives blocks from a // competing fork that have gap=0 but previous != head_block_id. if (block_applied) { - currently_syncing.store(currently_syncing_flag, std::memory_order_relaxed); + currently_syncing.store(currently_syncing_flag, std::memory_order_release); if (currently_syncing_flag) { if (!sync_start_logged) { ilog("\033[92m>>> Syncing Blockchain started from block #${n} (head: ${head})\033[0m", @@ -280,11 +281,15 @@ namespace chain { } bool plugin::is_syncing() const { - return my->currently_syncing.load(std::memory_order_relaxed); + return my->currently_syncing.load(std::memory_order_acquire); + } + + bool plugin::is_recovering() const { + return my->recovery_in_progress.load(std::memory_order_acquire); } void plugin::clear_syncing() { - if (my->currently_syncing.exchange(false, std::memory_order_relaxed)) { + if (my->currently_syncing.exchange(false, std::memory_order_acq_rel)) { ilog("Sync complete: cleared currently_syncing flag (validator block production may resume)"); my->sync_start_logged = false; } @@ -631,7 +636,9 @@ namespace chain { try { ilog("Opening shared memory from ${path}", ("path", my->shared_memory_dir.generic_string())); my->db.open(data_dir, my->shared_memory_dir, CHAIN_INIT_SUPPLY, my->shared_memory_size, chainbase::database::read_write/*, my->validate_invariants*/ ); + ilog("db.open() completed successfully, head_block_num=${h}", ("h", my->db.head_block_num())); auto head_block_log = my->db.get_block_log().head(); + ilog("block_log head=${h}", ("h", head_block_log ? std::to_string(head_block_log->block_num()) : std::string("none"))); my->replay |= head_block_log && my->db.revision() != head_block_log->block_num(); if (my->replay) { @@ -883,14 +890,40 @@ namespace chain { } void plugin::attempt_auto_recovery() { - static std::atomic recovery_in_progress{false}; + static constexpr int MAX_CONSECUTIVE_RECOVERIES = 3; + static constexpr int RECOVERY_COOLDOWN_SEC = 300; // 5 minutes + static int consecutive_recoveries = 0; + static fc::time_point last_recovery_time; + bool expected = false; - if (!recovery_in_progress.compare_exchange_strong(expected, true)) { + if (!my->recovery_in_progress.compare_exchange_strong(expected, true)) { wlog("Auto-recovery already in progress, skipping duplicate attempt"); return; } - wlog("=== IMMEDIATE AUTO-RECOVERY: shared memory corruption detected ==="); + // Guard against infinite recovery loops: if the same block keeps + // failing after recovery, the snapshot or block log may be corrupted. + // Reset the counter after a cooldown period to allow eventual retry. + auto now = fc::time_point::now(); + if (last_recovery_time != fc::time_point() && + (now - last_recovery_time).to_seconds() > RECOVERY_COOLDOWN_SEC) { + consecutive_recoveries = 0; + } + consecutive_recoveries++; + last_recovery_time = now; + + if (consecutive_recoveries > MAX_CONSECUTIVE_RECOVERIES) { + elog("Auto-recovery limit reached: ${n} consecutive attempts within ${c}s cooldown. " + "The snapshot or block log may be corrupted — manual intervention required. " + "Try a fresh snapshot or delete the block log.", + ("n", consecutive_recoveries)("c", RECOVERY_COOLDOWN_SEC)); + my->recovery_in_progress.store(false, std::memory_order_release); + appbase::app().quit(); + return; + } + + wlog("=== IMMEDIATE AUTO-RECOVERY: shared memory corruption detected (attempt ${n}/${max}) ===", + ("n", consecutive_recoveries)("max", MAX_CONSECUTIVE_RECOVERIES)); // 1. Find latest snapshot fc::path snap = my->find_latest_snapshot(); @@ -926,7 +959,7 @@ namespace chain { } // Mark syncing so witness plugin defers block production during recovery. - my->currently_syncing.store(true, std::memory_order_relaxed); + my->currently_syncing.store(true, std::memory_order_release); wlog("Auto-recovery: closing database and recovering from snapshot ${p}...", ("p", snap.string())); @@ -946,18 +979,40 @@ namespace chain { wlog("=== AUTO-RECOVERY COMPLETE: node resumed at block ${n} ===", ("n", my->db.head_block_num())); + // Recovery is complete: clear the syncing flag so the validator + // plugin can resume block production once the post-pause catchup + // window closes. The DLT P2P delegate calls db.push_block() + // directly and bypasses plugin_impl::accept_block(), so the + // flag-update path that would otherwise self-clear this on the + // next applied block never runs on the DLT path. Without this + // explicit reset, the flag set above stays true forever and + // is_syncing() permanently gates production with not_synced. + // The remaining catchup window is gated by _catchup_after_pause + // in the P2P layer, which clears itself once peers are no longer + // ahead of our head. + my->currently_syncing.store(false, std::memory_order_release); + // 5. Resume P2P now that the database is fully rebuilt. // do_snapshot_load(is_recovery=true) already set LIB = head // so P2P will request blocks after the snapshot head. try { auto* p2p_plug = appbase::app().find_plugin(); if (p2p_plug && p2p_plug->get_state() == appbase::abstract_plugin::started) { + // Clear soft-bans BEFORE resuming so that peers banned + // before the corruption (which may carry the majority fork) + // can reconnect and serve blocks immediately after recovery. + p2p_plug->reset_peers_after_recovery(); p2p_plug->resume_block_processing(); wlog("Auto-recovery: P2P block processing resumed"); } } catch (...) { wlog("Auto-recovery: failed to resume P2P"); } + + // Allow future recovery attempts. Without this reset the + // atomic stays true forever and any subsequent corruption + // event is silently discarded, leaving the node permanently stuck. + my->recovery_in_progress.store(false, std::memory_order_release); } catch (const fc::exception& e) { elog("Auto-recovery FAILED during snapshot load: ${e}", ("e", e.to_detail_string())); appbase::app().quit(); diff --git a/plugins/custom_protocol_api/custom_protocol_api.cpp b/plugins/custom_protocol_api/custom_protocol_api.cpp index 693a305e9f..c475c5c1e0 100644 --- a/plugins/custom_protocol_api/custom_protocol_api.cpp +++ b/plugins/custom_protocol_api/custom_protocol_api.cpp @@ -58,6 +58,7 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { } uint8_t custom_protocol_store_size = 10; + boost::signals2::connection _post_op_conn; private: graphene::chain::database& database_; @@ -70,6 +71,9 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { void custom_protocol_api_plugin::plugin_shutdown() { wlog("custom_protocol_api plugin: plugin_shutdown()"); + if (pimpl) { + pimpl->_post_op_conn.disconnect(); + } } const std::string& custom_protocol_api_plugin::name() { @@ -93,7 +97,7 @@ namespace graphene { namespace plugins { namespace custom_protocol_api { void custom_protocol_api_plugin::plugin_initialize(const boost::program_options::variables_map& options) { pimpl = std::make_unique(); auto& db = pimpl->database(); - db.post_apply_operation.connect([&](const operation_notification& note) { + pimpl->_post_op_conn = db.post_apply_operation.connect([&](const operation_notification& note) { pimpl->on_operation(note); }); add_plugin_index(db); diff --git a/plugins/database_api/api.cpp b/plugins/database_api/api.cpp index cecf8dc128..778fdb2c8d 100755 --- a/plugins/database_api/api.cpp +++ b/plugins/database_api/api.cpp @@ -130,6 +130,8 @@ struct plugin::api_impl final { block_applied_callback_info::cont active_block_applied_callback; block_applied_callback_info::cont free_block_applied_callback; + boost::signals2::connection _applied_block_conn; + private: graphene::chain::database &_db; @@ -912,7 +914,7 @@ void plugin::plugin_initialize(const boost::program_options::variables_map &opti ilog("database_api plugin: plugin_initialize() begin"); my = std::make_unique(); JSON_RPC_REGISTER_API(plugin_name) - my->database().applied_block.connect([this](const protocol::signed_block &) { + my->_applied_block_conn = my->database().applied_block.connect([this](const protocol::signed_block &) { this->clear_block_applied_callback(); }); ilog("database_api plugin: plugin_initialize() end"); @@ -922,4 +924,10 @@ void plugin::plugin_startup() { my->startup(); } +void plugin::plugin_shutdown() { + if (my) { + my->_applied_block_conn.disconnect(); + } +} + } } } // graphene::plugins::database_api diff --git a/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp b/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp index 654e266ca0..b2f5c8a126 100755 --- a/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp +++ b/plugins/database_api/include/graphene/plugins/database_api/plugin.hpp @@ -196,7 +196,7 @@ class plugin final : public appbase::plugin { void plugin_startup() override; - void plugin_shutdown() override{} + void plugin_shutdown() override; plugin(); diff --git a/plugins/network_broadcast_api/network_broadcast_api.cpp b/plugins/network_broadcast_api/network_broadcast_api.cpp index e41f05d0b7..43a1e96ad2 100644 --- a/plugins/network_broadcast_api/network_broadcast_api.cpp +++ b/plugins/network_broadcast_api/network_broadcast_api.cpp @@ -181,6 +181,7 @@ namespace graphene { } void network_broadcast_api_plugin::plugin_shutdown() { + on_applied_block_connection.disconnect(); } void network_broadcast_api_plugin::on_applied_block(const signed_block &b) { try { diff --git a/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp b/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp index 4099b58c2e..c6956f1129 100644 --- a/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp +++ b/plugins/p2p/include/graphene/plugins/p2p/p2p_plugin.hpp @@ -136,6 +136,12 @@ namespace graphene { */ void clear_catchup_flag(); + /** + * Clear all soft-bans after auto-recovery so peers that + * carry the majority fork can reconnect immediately. + */ + void reset_peers_after_recovery(); + private: std::unique_ptr my; }; diff --git a/plugins/p2p/p2p_plugin.cpp b/plugins/p2p/p2p_plugin.cpp index 942c2dd3e2..632918072c 100644 --- a/plugins/p2p/p2p_plugin.cpp +++ b/plugins/p2p/p2p_plugin.cpp @@ -747,6 +747,12 @@ void p2p_plugin::clear_catchup_flag() { if (my->node) my->node->clear_catchup_after_pause(); } +void p2p_plugin::reset_peers_after_recovery() { + my->p2p_thread.async([this]() { + if (my->node) my->node->reset_peers_after_recovery(); + }); +} + } // namespace p2p } // namespace plugins } // namespace graphene diff --git a/plugins/snapshot/include/graphene/plugins/snapshot/plugin.hpp b/plugins/snapshot/include/graphene/plugins/snapshot/plugin.hpp index c9aa6eef15..a50ffa20f0 100644 --- a/plugins/snapshot/include/graphene/plugins/snapshot/plugin.hpp +++ b/plugins/snapshot/include/graphene/plugins/snapshot/plugin.hpp @@ -92,6 +92,11 @@ namespace graphene { namespace plugins { namespace snapshot { /// snapshot serialization (avoids write-lock contention). bool is_snapshot_in_progress() const; + /// Returns true while a snapshot is being loaded (hot-reload during stalled sync detection). + /// During this phase the database holds a WRITE lock, so incoming RPC reads will time out. + /// The webserver plugin uses this to return 503 instead of attempting database reads. + bool is_snapshot_reloading() const; + private: class plugin_impl; std::unique_ptr my; diff --git a/plugins/snapshot/plugin.cpp b/plugins/snapshot/plugin.cpp index f89b3dc81b..f65ce9a3c8 100644 --- a/plugins/snapshot/plugin.cpp +++ b/plugins/snapshot/plugin.cpp @@ -975,6 +975,18 @@ fc::mutable_variant_object snapshot_plugin::plugin_impl::serialize_state() { EXPORT_INDEX(account_authority_index, account_authority_object, "account_authority") EXPORT_INDEX(validator_index, validator_object, "validator") EXPORT_INDEX(validator_vote_index, validator_vote_object, "validator_vote") + // Sanity: if validators exist but votes are absent, the chainbase type enum + // likely shifted (types added/removed before validator_vote_object_type). + // This would silently corrupt the snapshot. + { + auto n_validators = state["validator"].get_array().size(); + auto n_votes = state["validator_vote"].get_array().size(); + if (n_validators > 0 && n_votes == 0) + wlog("SNAPSHOT INTEGRITY: ${v} validators but 0 validator votes — " + "validator_vote_index may be empty due to chainbase type-enum mismatch. " + "Snapshot will be INCOMPLETE.", + ("v", n_validators)); + } EXPORT_INDEX(block_summary_index, block_summary_object, "block_summary") EXPORT_INDEX(content_index, content_object, "content") EXPORT_INDEX(content_vote_index, content_vote_object, "content_vote") @@ -1560,6 +1572,14 @@ void snapshot_plugin::plugin_impl::load_snapshot(const fc::path& input_path) { if (state.contains("validator_vote")) { auto n = detail::import_validator_votes(db, state["validator_vote"].get_array()); ilog(CLOG_ORANGE "Imported ${n} validator votes" CLOG_RESET, ("n", n)); + // Defensive fallback: validator_vote was present but empty; the snapshot may have + // been produced from a chainbase DB with a type-enum mismatch (see export warning). + // If an old witness_vote key also exists with data, use it to recover. + if (n == 0 && state.contains("witness_vote")) { + auto n2 = detail::import_validator_votes(db, state["witness_vote"].get_array()); + if (n2 > 0) + ilog(CLOG_ORANGE "Imported ${n} validator votes (recovered from witness_vote)" CLOG_RESET, ("n", n2)); + } } else if (state.contains("witness_vote")) { // backward compat: old snapshots used "witness_vote" key auto n = detail::import_validator_votes(db, state["witness_vote"].get_array()); @@ -1953,26 +1973,38 @@ void snapshot_plugin::plugin_impl::on_applied_block(const graphene::protocol::si // deferred. We do NOT re-check is_validator_producing_soon() here to avoid an // infinite deferral loop where the validator is always scheduled soon. // - // Instead, we wait for the specific validator slot to be filled: the deferred - // snapshot only fires once head_block_time() >= pending_snapshot_safe_after_time, - // meaning the validator's block has been produced and applied (or the slot was - // missed and the chain moved past it). This prevents the snapshot from starting - // while the validator is about to produce. + // We wait for a block to be applied that is STRICTLY AFTER the validator slot + // we deferred for: head_block_time() > pending_snapshot_safe_after_time. + // + // Why strictly greater (not >=): + // The applied_block signal is dispatched synchronously inside _push_block, + // BEFORE generate_block() returns to the validator and BEFORE the validator + // calls p2p().broadcast_block(). If we fired the snapshot on the same block + // the local validator just produced, the snapshot read-lock could start + // before the produced block has been broadcast to peers. + // + // Requiring head_block_time > slot_time means we wait until a SUBSEQUENT + // block is applied. That block is necessarily produced by another validator + // on top of ours, which proves our block was successfully produced, applied + // locally, and propagated through the network. Only then is it safe to + // start the snapshot read pass. + // + // Cost: ~one block interval of additional delay, but only when the local + // validator was the deferral target. When we are not the producer, the + // snapshot fires immediately at the originating block (no deferral path). if (snapshot_pending && !is_syncing) { // If safe_after_time is epoch (lookup failed), fire immediately as fallback. - // If head_block_time has reached/passed the validator slot time, the block - // at that slot has been applied (or the slot was skipped by a gap). bool safe_to_fire = (pending_snapshot_safe_after_time == fc::time_point_sec()) || - (db.head_block_time() >= pending_snapshot_safe_after_time); + (db.head_block_time() > pending_snapshot_safe_after_time); if (safe_to_fire) { fc::path output(pending_snapshot_path); snapshot_pending = false; pending_snapshot_path.clear(); pending_snapshot_safe_after_time = fc::time_point_sec(); - ilog(CLOG_GREEN "Creating deferred snapshot now (validator slot passed): ${p}" CLOG_RESET, ("p", output.string())); + ilog(CLOG_GREEN "Creating deferred snapshot now (validator slot passed and block broadcast): ${p}" CLOG_RESET, ("p", output.string())); schedule_async_snapshot(output, "deferred"); } else { - dlog("Deferred snapshot waiting for validator slot at ${t} (head_block_time=${h})", + dlog("Deferred snapshot waiting for block strictly after validator slot ${t} (head_block_time=${h})", ("t", pending_snapshot_safe_after_time)("h", db.head_block_time())); } } @@ -4099,4 +4131,9 @@ bool snapshot_plugin::is_snapshot_in_progress() const { return my->snapshot_in_progress.load(std::memory_order_relaxed); } +bool snapshot_plugin::is_snapshot_reloading() const { + if (!my) return false; + return my->_snapshot_reloading.load(std::memory_order_acquire); +} + } } } // graphene::plugins::snapshot diff --git a/plugins/validator/validator.cpp b/plugins/validator/validator.cpp index ea87e41ee5..e53664bd6a 100644 --- a/plugins/validator/validator.cpp +++ b/plugins/validator/validator.cpp @@ -208,6 +208,8 @@ namespace graphene { // Updated in the applied_block signal handler. uint64_t _last_applied_block_num = 0; + boost::signals2::connection _applied_block_connection; + // Protects cross-thread diagnostic fields shared between // production_io_thread_ and the P2P thread (on_block_applied / // get_production_diagnostics). Never held during database() calls. @@ -368,7 +370,7 @@ namespace graphene { // Connect to applied_block signal to detect missed slots // that belong to our validators and log diagnostic state. pimpl->_last_applied_block_num = d.head_block_num(); - d.applied_block.connect([this](const graphene::chain::signed_block &block) { + pimpl->_applied_block_connection = d.applied_block.connect([this](const graphene::chain::signed_block &block) { pimpl->on_block_applied(block); }); @@ -387,6 +389,7 @@ namespace graphene { void validator_plugin::plugin_shutdown() { graphene::time::shutdown_ntp_time(); if (!pimpl->_validators.empty()) { + pimpl->_applied_block_connection.disconnect(); ilog("shutting downing production timer"); // Stop the dedicated io_service so the production thread exits. // io_service::stop() is thread-safe; it causes run() to return @@ -1274,8 +1277,17 @@ namespace graphene { // - No blocks arrive to clear the syncing flag // - The production loop is the only path to advance the chain if (db._debug_block_production) ilog("DEBUG_CRASH: getting dgp"); - const auto &dgp = db.get_dynamic_global_properties(); - if (db._debug_block_production) ilog("DEBUG_CRASH: dgp ok, head=${h} emergency=${e}", ("h", dgp.head_block_number)("e", dgp.emergency_consensus_active)); + // Copy emergency_consensus_active under a read lock so the local bool + // stays valid even if a concurrent P2P resize remaps the segment between + // here and the op_guard created below at line ~1629. Keeping a raw + // const& into shared memory without an op_guard is a dangling-reference + // risk: begin_resize_barrier() can complete while _active_operations==0 + // and unmap the old segment before we dereference the field. + bool emergency_active = false; + db.with_weak_read_lock([&]() { + emergency_active = db.get_dynamic_global_properties().emergency_consensus_active; + }); + if (db._debug_block_production) ilog("DEBUG_CRASH: dgp ok, head=${h} emergency=${e}", ("h", db.head_block_num())("e", emergency_active)); // === DLT MODE: DEFER PRODUCTION DURING ACTIVE SYNC === // In DLT mode, the validator must not produce blocks while the @@ -1301,7 +1313,7 @@ namespace graphene { // while the network is catching up. if (db._dlt_mode && chain().is_syncing()) { bool we_are_emergency_master = - dgp.emergency_consensus_active && + emergency_active && _validators.find(CHAIN_EMERGENCY_VALIDATOR_ACCOUNT) != _validators.end(); if (!we_are_emergency_master) { return block_validation_condition::not_synced; @@ -1347,7 +1359,7 @@ namespace graphene { if (db._debug_block_production) ilog("DEBUG_CRASH: checking hardfork12 and emergency path"); if (db.has_hardfork(CHAIN_HARDFORK_12)) { - if (dgp.emergency_consensus_active) { + if (emergency_active) { // EMERGENCY MODE: auto-bypass both stale and participation checks // for the emergency master only. The master holds the // emergency-private-key and MUST produce to avoid deadlock. @@ -1498,7 +1510,7 @@ namespace graphene { // continue producing (bootstrap / testnet / recovery scenario). // With enable-stale-production=false (default): we're on the wrong fork, // pop back to LIB and resync from the P2P network. - if (!dgp.emergency_consensus_active) { + if (!emergency_active) { auto fork_head = db.get_fork_db().head(); if (fork_head) { bool all_ours = true; @@ -1561,7 +1573,7 @@ namespace graphene { // "ours" is expected — other nodes sync from us. Skip minority fork // detection entirely to avoid false positives and the production // deadlock that would otherwise occur. - if (dgp.emergency_consensus_active && db._dlt_mode) { + if (emergency_active && db._dlt_mode) { // If committee is in the schedule and we have its key, WE are the // emergency master. All blocks being "ours" is expected -- other // nodes sync from us. Skip minority fork detection to prevent @@ -1849,7 +1861,7 @@ namespace graphene { bool has_competing_block = false; graphene::chain::item_ptr competing_block; - if (dgp.emergency_consensus_active) { + if (emergency_active) { // During emergency mode: ANY block at this height is competing. // Multiple nodes with the emergency key may have produced. // Defer to the deterministic hash-based resolution in fork_db. @@ -2007,7 +2019,7 @@ namespace graphene { // Roll back to LIB and resync from P2P network. elog("unlinkable_block_exception during block generation: fork_db broken. " "Rolling back to LIB and resyncing from P2P network."); - p2p().resync_from_lib(dgp.emergency_consensus_active /*force_emergency*/); + p2p().resync_from_lib(emergency_active /*force_emergency*/); _minority_fork_recovering = true; _minority_fork_recovery_start = fc::time_point::now(); return block_validation_condition::minority_fork; diff --git a/plugins/webserver/CMakeLists.txt b/plugins/webserver/CMakeLists.txt index 280015586b..181adf51e5 100644 --- a/plugins/webserver/CMakeLists.txt +++ b/plugins/webserver/CMakeLists.txt @@ -28,6 +28,7 @@ target_link_libraries( graphene::json_rpc graphene_chain graphene::chain_plugin + graphene_snapshot appbase fc) target_include_directories(graphene_${CURRENT_TARGET} diff --git a/plugins/webserver/webserver_plugin.cpp b/plugins/webserver/webserver_plugin.cpp index 3c4aadd63b..0bd710ab9b 100644 --- a/plugins/webserver/webserver_plugin.cpp +++ b/plugins/webserver/webserver_plugin.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -224,6 +225,8 @@ namespace graphene { asio::io_service::work thread_pool_work; plugins::json_rpc::plugin *api; + chain::plugin *chain_plugin = nullptr; + snapshot::snapshot_plugin *snap_plugin = nullptr; boost::signals2::connection chain_sync_con; boost::signals2::connection applied_block_conn; @@ -359,6 +362,13 @@ namespace graphene { auto con = server->get_con_from_hdl(hdl); thread_pool_ios.post([con, msg, this]() { try { + if ((chain_plugin && chain_plugin->is_recovering()) || + (snap_plugin && snap_plugin->is_snapshot_reloading())) { + auto ec = con->send("{\"jsonrpc\":\"2.0\",\"error\":{\"code\":-32003,\"message\":\"Node is recovering from shared memory corruption, please retry later\"},\"id\":null}"); + if (ec) throw websocketpp::exception(ec); + return; + } + if (msg->get_opcode() == websocketpp::frame::opcode::text) { auto body = msg->get_payload(); @@ -421,10 +431,31 @@ namespace graphene { auto con = server->get_con_from_hdl(hdl); con->defer_http_response(); + // CORS preflight + if (con->get_request().get_method() == "OPTIONS") { + con->append_header("Access-Control-Allow-Origin", "*"); + con->append_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS"); + con->append_header("Access-Control-Allow-Headers", "Content-Type, Authorization"); + con->append_header("Access-Control-Max-Age", "86400"); + con->set_status(websocketpp::http::status_code::ok); + try { con->send_http_response(); } catch (...) {} + return; + } + thread_pool_ios.post([con, this]() { + if ((chain_plugin && chain_plugin->is_recovering()) || + (snap_plugin && snap_plugin->is_snapshot_reloading())) { + con->append_header("Access-Control-Allow-Origin", "*"); + con->set_body("{\"jsonrpc\":\"2.0\",\"error\":{\"code\":-32003,\"message\":\"Node is recovering from shared memory corruption, please retry later\"},\"id\":null}"); + con->set_status(websocketpp::http::status_code::service_unavailable); + try { con->send_http_response(); } catch (...) {} + return; + } + auto body = con->get_request_body(); if (body.empty()) { + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("empty request body"); con->set_status(websocketpp::http::status_code::bad_request); try { con->send_http_response(); } catch (...) {} @@ -439,12 +470,14 @@ namespace graphene { // Invalid JSON — skip cache, let json_rpc handle the error try { api->call(body, [con](const std::string &data){ + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(data); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); }); } catch (fc::exception &e) { edump((e)); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("Could not call API"); con->set_status(websocketpp::http::status_code::not_found); try { con->send_http_response(); } catch (...) {} @@ -466,6 +499,7 @@ namespace graphene { if (cached_response.valid()) { // Patch the id in cached response to match request std::string patched = patch_response_id(*cached_response, request_id); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(patched); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); @@ -477,6 +511,7 @@ namespace graphene { api->call(body, [con, this, request_hash, cacheable](const std::string &data){ // this lambda can be called from any thread in application // for example, when task was delegated ( see msg_pack(msg_pack&&) ) + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body(data); con->set_status(websocketpp::http::status_code::ok); con->send_http_response(); @@ -489,6 +524,7 @@ namespace graphene { } catch (fc::exception &e) { // this case happens if exception was thrown on parsing request edump((e)); + con->append_header("Access-Control-Allow-Origin", "*"); con->set_body("Could not call API"); con->set_status(websocketpp::http::status_code::not_found); // this sending response can't be merged with sending response from try-block @@ -591,7 +627,10 @@ namespace graphene { my->api = appbase::app().find_plugin(); FC_ASSERT(my->api != nullptr, "Could not find API Register Plugin"); - chain::plugin *chain = appbase::app().find_plugin(); + my->chain_plugin = appbase::app().find_plugin(); + my->snap_plugin = appbase::app().find_plugin(); + + chain::plugin *chain = my->chain_plugin; if (chain != nullptr && chain->get_state() != appbase::abstract_plugin::started) { ilog("Waiting for chain plugin to start"); my->chain_sync_con = chain->on_sync.connect([this]() { @@ -613,6 +652,8 @@ namespace graphene { } void webserver_plugin::plugin_shutdown() { + my->chain_sync_con.disconnect(); + my->applied_block_conn.disconnect(); my->stop_webserver(); } diff --git a/share/vizd/config/config.ini b/share/vizd/config/config.ini index c29dd224b4..32139779ce 100644 --- a/share/vizd/config/config.ini +++ b/share/vizd/config/config.ini @@ -118,9 +118,6 @@ history-count-blocks = 57600 # Defines starting block from which recording stats by the account_history and operation_history plugin. history-start-block = 70000000 -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - # name of validator controlled by this node (e.g. initwitness ) # validator = # # validator = # DEPRECATED: use 'validator' diff --git a/share/vizd/config/config_debug.ini b/share/vizd/config/config_debug.ini index 25c31e30de..7d26c2f992 100644 --- a/share/vizd/config/config_debug.ini +++ b/share/vizd/config/config_debug.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 100M # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 10 # each 30 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags account_by_key account_history operation_history block_info raw_block debug_node witness_api +plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api account_by_key account_history operation_history block_info raw_block debug_node # Remove votes before defined block, should increase performance clear-votes-before-block = 0 # don't clear votes @@ -93,12 +93,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - # Enable block production, even if the chain is stale. enable-stale-production = true diff --git a/share/vizd/config/config_debug_mongo.ini b/share/vizd/config/config_debug_mongo.ini deleted file mode 100644 index 7377448471..0000000000 --- a/share/vizd/config/config_debug_mongo.ini +++ /dev/null @@ -1,143 +0,0 @@ -# Endpoint for P2P node to listen on -# p2p-endpoint = - -# Maxmimum number of incoming connections on P2P endpoint -# p2p-max-connections = - -# P2P nodes to connect to on startup (may specify multiple times) -# p2p-seed-node = - -# Enable stale sync detection: when no blocks are received for the configured timeout, -# the node resets sync from the last irreversible block and reconnects all seed peers. -# p2p-stale-sync-detection = false - -# Timeout in seconds before stale sync detection triggers recovery (default: 120 = 2 minutes). -# p2p-stale-sync-timeout-seconds = 120 - -# Pairs of [BLOCK_NUM,BLOCK_ID] that should be enforced as checkpoints. -# checkpoint = - -# Number of threads for rpc-clients. Optimal value `-1` -webserver-thread-pool-size = 2 - -# IP:PORT for HTTP connections -webserver-http-endpoint = 0.0.0.0:8090 - -# IP:PORT for WebSocket connections -webserver-ws-endpoint = 0.0.0.0:8091 - -# Maximum microseconds for trying to get read lock -read-wait-micro = 500000 - -# Maximum retries to get read lock. Each retry is read-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire READ lock'. -max-read-wait-retries = 2 - -# Maximum microseconds for trying to get write lock on broadcast transaction. -write-wait-micro = 500000 - -# Maximum retries to get write lock. Each retry is write-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire WRITE lock'. -max-write-wait-retries = 3 - -# Do all write operations (push_block/push_transaction) in the single thread. -# Write lock of database is very heavy. When many threads tries to lock database on writing, rpc-clients -# receive many errors 'Unable to acquire READ lock' ('Unable to acquire WRITE lock'). -# Enabling of this options can increase performance. -single-write-thread = true - -# Enable plugin notifications about operations in a pushed transaction, which should be included to the next generated -# block. Plugins doesn't validate data in operations, they only update its own indexes, so notifications can be -# disabled on push_transaction() without any side-effects. The option doesn't have effect on a pushing signed blocks, -# so it is safe. -# Disabling of this option can increase performance. -enable-plugins-on-push-transaction = true - -# A start size for shared memory file when it doesn't have any data. Possible cases: -# - If shared memory has data and the value is greater then the size of shared_memory.bin, -# the file will be grown to requested size. -# - If shared memory has data and the value is less then the size of shared_memory.bin, nothing happens. -# Changing of this parameter doesn't require the replaying. -shared-file-size = 100M - -# The minimum free space in the shared memory file. When free space reaches the following value, the size of the -# shared_memory.bin increases by the value of inc-shared-file-size. -min-free-shared-file-size = 50M - -# Step of increasing size of shared_memory.bin. When the free memory size reaches min-free-shared-file-size, -# the shared memory size increases by the following value. -inc-shared-file-size = 100M - -# How often do checking the free space in shared_memory.bin. A very frequent checking can decrease performance. -# It's not critical if the free size became very small, because the daemon catches the `bad_alloc` exception -# and resizes. The optimal strategy is do checking of the free space, but not very often. -block-num-check-free-size = 10 # each 30 seconds - -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags market_history account_by_key account_history operation_history block_info raw_block debug_node validator_api mongo_db - -# For connect to mongodb which is running outside Docker (if vizd running inside) -mongodb-uri = mongodb://172.17.0.1:27017/viz - -# Remove votes before defined block, should increase performance -clear-votes-before-block = 0 # don't clear votes - -# Virtual operations will not be passed to the plugins, enabling of the option helps to save some memory. -skip-virtual-ops = false - -# Defines a range of accounts to track by the account_history plugin as a json pair ["from","to"] [from,to] -# track-account-range = - -# Defines a list of operations which will be explicitly logged by the account_history plugin. -# history-whitelist-ops = - -# Defines a list of operations which will be explicitly ignored by the account_history plugin. -# history-blacklist-ops = - -# Defines starting block from which recording stats by the account_history plugin. -# history-start-block = - -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Track market history by grouping orders into buckets of equal size measured in seconds specified as a JSON array of numbers -bucket-size = [15,60,300,3600,86400] - -# How far back in time to track history for each bucket size, measured in the number of buckets (default: 5760) -history-per-size = 5760 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - -# Enable block production, even if the chain is stale. -enable-stale-production = true - - -# Percent of validators (0-99) that must be participating in order to produce blocks -required-participation = 0 - -# name of validator controlled by this node (e.g. initwitness ) -validator = "viz" -# validator = "viz" # DEPRECATED: use 'validator' - -# WIF PRIVATE KEY to be used by one or more validators -private-key = 5JVFFWRLwz6JoP9kguuRFfytToGU6cLgBVTL9t6NB3D3BQLbUBS - -# declare an appender named "stderr" that writes messages to the console -[log.console_appender.stderr] -stream=std_error - -# declare an appender named "p2p" that writes messages to p2p.log -[log.file_appender.p2p] -filename=logs/p2p/p2p.log -# filename can be absolute or relative to this config file - -# route any messages logged to the default logger to the "stderr" logger we -# declared above, if they are info level are higher -[logger.default] -level=info -appenders=stderr - -# route messages sent to the "p2p" logger to stderr too -[logger.p2p] -level=info -appenders=stderr diff --git a/share/vizd/config/config_mongo.ini b/share/vizd/config/config_mongo.ini deleted file mode 100644 index a8ee8be8e6..0000000000 --- a/share/vizd/config/config_mongo.ini +++ /dev/null @@ -1,143 +0,0 @@ -# Endpoint for P2P node to listen on -p2p-endpoint = 0.0.0.0:4243 - -# Maxmimum number of incoming connections on P2P endpoint -# p2p-max-connections = - -# P2P nodes to connect to on startup (may specify multiple times) -# p2p-seed-node = - -# Enable stale sync detection: when no blocks are received for the configured timeout, -# the node resets sync from the last irreversible block and reconnects all seed peers. -# p2p-stale-sync-detection = false - -# Timeout in seconds before stale sync detection triggers recovery (default: 120 = 2 minutes). -# p2p-stale-sync-timeout-seconds = 120 - -# Pairs of [BLOCK_NUM,BLOCK_ID] that should be enforced as checkpoints. -# checkpoint = - -# Number of threads for rpc-clients. The optimal value is `-1` -webserver-thread-pool-size = 2 - -# IP:PORT for HTTP connections -webserver-http-endpoint = 0.0.0.0:8090 - -# IP:PORT for WebSocket connections -webserver-ws-endpoint = 0.0.0.0:8091 - -# Maximum microseconds for trying to get read lock -read-wait-micro = 500000 - -# Maximum retries to get read lock. Each retry is read-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire READ lock'. -max-read-wait-retries = 2 - -# Maximum microseconds for trying to get write lock on broadcast transaction. -write-wait-micro = 500000 - -# Maximum retries to get write lock. Each retry is write-wait-micro microseconds. -# When all retries are made, the rpc-client receives error 'Unable to acquire WRITE lock'. -max-write-wait-retries = 3 - -# Do all write operations (push_block/push_transaction) in the single thread. -# Write lock of database is very heavy. When many threads tries to lock database on writing, rpc-clients -# receive many errors 'Unable to acquire READ lock' ('Unable to acquire WRITE lock'). -# Enabling of this options can increase performance. -single-write-thread = true - -# Enable plugin notifications about operations in a pushed transaction, which should be included to the next generated -# block. Plugins doesn't validate data in operations, they only update its own indexes, so notifications can be -# disabled on push_transaction() without any side-effects. The option doesn't have effect on a pushing signed blocks, -# so it is safe. -# Disabling of this option can increase performance. -enable-plugins-on-push-transaction = false - -# A start size for shared memory file when it doesn't have any data. Possible cases: -# - If shared memory has data and the value is greater then the size of shared_memory.bin, -# the file will be grown to requested size. -# - If shared memory has data and the value is less then the size of shared_memory.bin, nothing happens. -# Changing of this parameter doesn't require the replaying. -shared-file-size = 2G - -# The minimum free space in the shared memory file. When free space reaches the following value, the size of the -# shared_memory.bin increases by the value of inc-shared-file-size. -min-free-shared-file-size = 500M - -# Step of increasing size of shared_memory.bin. When the free memory size reaches min-free-shared-file-size, -# the shared memory size increases by the following value. -inc-shared-file-size = 2G - -# How often do checking the free space in shared_memory.bin. A very frequent checking can decrease performance. -# It's not critical if the free size became very small, because the daemon catches the `bad_alloc` exception -# and resizes. The optimal strategy is do checking of the free space, but not very often. -block-num-check-free-size = 1000 # each 3000 seconds - -plugin = chain p2p json_rpc webserver network_broadcast_api validator test_api database_api private_message follow social_network tags market_history account_by_key operation_history account_history block_info raw_block validator_api mongo_db - -# For connect to mongodb which is running outside Docker (if vizd running inside) -mongodb-uri = mongodb://172.17.0.1:27017/viz - -# Remove votes before defined block, should increase performance -clear-votes-before-block = 0 # clear votes after each cashout - -# Virtual operations will not be passed to the plugins, enabling of the option helps to save some memory. -skip-virtual-ops = false - -# Defines a range of accounts to track by the account_history plugin as a json pair ["from","to"] [from,to] -# track-account-range = - -# Defines a list of operations which will be explicitly logged by the account_history plugin. -# history-whitelist-ops = account_create_operation account_update_operation content_operation delete_content_operation vote_operation author_reward_operation curation_reward_operation transfer_operation transfer_to_vesting_operation withdraw_vesting_operation witness_update_operation account_witness_vote_operation account_witness_proxy_operation fill_vesting_withdraw_operation shutdown_witness_operation custom_json_operation request_account_recovery_operation recover_account_operation change_recovery_account_operation escrow_transfer_operation escrow_approve_operation escrow_dispute_operation escrow_release_operation content_benefactor_reward_operation - -# Defines a list of operations which will be explicitly ignored by the account_history plugin. -# history-blacklist-ops = - -# Defines starting block from which recording stats by the account_history plugin. -# history-start-block = 0 - -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Track market history by grouping orders into buckets of equal size measured in seconds specified as a JSON array of numbers -bucket-size = [15,60,300,3600,86400] - -# How far back in time to track history for each bucket size, measured in the number of buckets (default: 5760) -history-per-size = 5760 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - -# Enable block production, even if the chain is stale. -enable-stale-production = false - - -# Percent of validators (0-99) that must be participating in order to produce blocks -required-participation = 0 - -# name of validator controlled by this node (e.g. initwitness ) -# validator = -# # validator = # DEPRECATED: use 'validator' - -# WIF PRIVATE KEY to be used by one or more validators -# private-key = - -# declare an appender named "stderr" that writes messages to the console -[log.console_appender.stderr] -stream=std_error - -# declare an appender named "p2p" that writes messages to p2p.log -[log.file_appender.p2p] -filename=logs/p2p/p2p.log -# filename can be absolute or relative to this config file - -# route any messages logged to the default logger to the "stderr" logger we -# declared above, if they are info level are higher -[logger.default] -level=debug -appenders=stderr - -# route messages sent to the "p2p" logger to stderr too -[logger.p2p] -level=error -appenders=stderr diff --git a/share/vizd/config/config_stock_exchange.ini b/share/vizd/config/config_stock_exchange.ini index 3a89b81b94..48a413df51 100644 --- a/share/vizd/config/config_stock_exchange.ini +++ b/share/vizd/config/config_stock_exchange.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 2G # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 1000 # each 3000 seconds -plugin = chain p2p json_rpc webserver network_broadcast_api validator database_api block_info raw_block operation_history account_history witness_api +plugin = chain p2p json_rpc webserver network_broadcast_api validator database_api block_info raw_block operation_history account_history # Remove votes before defined block, should increase performance clear-votes-before-block = 0 # clear votes after each cashout diff --git a/share/vizd/config/config_testnet.ini b/share/vizd/config/config_testnet.ini index 7281d042b8..83e1ebaa6d 100644 --- a/share/vizd/config/config_testnet.ini +++ b/share/vizd/config/config_testnet.ini @@ -73,7 +73,7 @@ inc-shared-file-size = 2G # and resizes. The optimal strategy is do checking of the free space, but not very often. block-num-check-free-size = 1000 # each 3000 seconds -plugin = validator witness_api +plugin = validator plugin = chain p2p json_rpc webserver network_broadcast_api database_api plugin = account_history operation_history plugin = committee_api invite_api paid_subscription_api custom_protocol_api @@ -97,12 +97,6 @@ skip-virtual-ops = false # Defines starting block from which recording stats by the account_history plugin. # history-start-block = 0 -# Set the maximum size of cached feed for an account -follow-max-feed-size = 500 - -# Defines a range of accounts to private messages to/from as a json pair ["from","to"] [from,to) -# pm-account-range = - # Enable block production, even if the chain is stale. enable-stale-production = true diff --git a/share/vizd/docker/Dockerfile-production b/share/vizd/docker/Dockerfile-production index d32d7eec23..9eaf7869e9 100644 --- a/share/vizd/docker/Dockerfile-production +++ b/share/vizd/docker/Dockerfile-production @@ -131,6 +131,25 @@ COPY share/vizd/vizd.sh /etc/service/vizd/run COPY share/vizd/snapshot.json /var/lib/vizd COPY share/vizd/config/config.ini /etc/vizd/config.ini +# Log rotation — IMPORTANT for production deployments. +# +# vizd writes all output to stdout/stderr. Docker's default json-file log driver +# has no size limit, so a log flood (crash loop, assertion storm, etc.) can fill +# the host disk in minutes. Use the `local` driver, which is more space-efficient +# and supports automatic rotation. +# +# Recommended: set globally on the Docker host so every container is protected: +# +# /etc/docker/daemon.json: +# { +# "log-driver": "local", +# "log-opts": { "max-size": "100m", "max-file": "5" } +# } +# then: systemctl restart docker +# +# Per-container override (docker run): +# --log-driver=local --log-opt max-size=100m --log-opt max-file=5 +# # rpc services: # http EXPOSE 8090 diff --git a/share/vizd/docker/Dockerfile-testnet b/share/vizd/docker/Dockerfile-testnet index f4ba3cc300..a4f93f4192 100644 --- a/share/vizd/docker/Dockerfile-testnet +++ b/share/vizd/docker/Dockerfile-testnet @@ -132,6 +132,25 @@ COPY share/vizd/vizd.sh /etc/service/vizd/run COPY share/vizd/snapshot-testnet.json /var/lib/vizd/snapshot.json COPY share/vizd/config/config_testnet.ini /etc/vizd/config.ini +# Log rotation — IMPORTANT for production deployments. +# +# vizd writes all output to stdout/stderr. Docker's default json-file log driver +# has no size limit, so a log flood (crash loop, assertion storm, etc.) can fill +# the host disk in minutes. Use the `local` driver, which is more space-efficient +# and supports automatic rotation. +# +# Recommended: set globally on the Docker host so every container is protected: +# +# /etc/docker/daemon.json: +# { +# "log-driver": "local", +# "log-opts": { "max-size": "100m", "max-file": "5" } +# } +# then: systemctl restart docker +# +# Per-container override (docker run): +# --log-driver=local --log-opt max-size=100m --log-opt max-file=5 +# # rpc services: # http EXPOSE 8090 diff --git a/thirdparty/chainbase b/thirdparty/chainbase index 3d02090982..a05eee3e83 160000 --- a/thirdparty/chainbase +++ b/thirdparty/chainbase @@ -1 +1 @@ -Subproject commit 3d02090982d7df8ea2b796d58964ec430c26b506 +Subproject commit a05eee3e83d3e5e39b9c5a126b01d8f8d7d3350b