Skip to content

Commit 245901f

Browse files
authored
fix(covenantsigner): apply audit findings from #3935 rebased onto current base (#3940)
## Problem PR #3935 was merged into `fix/review-findings`, an intermediate base branch that was never integrated into `feat/psbt-covenant-final-project-pr`. This left `feat/psbt-covenant-final-project-pr` without the audit-finding fixes that #3935 implemented (createOrDedup mutex extraction, healthz auth bypass restriction, canonical JSON for handoff payload, error message improvements, gosec hardening, and more). ## Solution This PR cherry-picks the squash commit of #3935 onto the current state of `feat/psbt-covenant-final-project-pr`, which already contains PR #3933 (Maclane's fault-isolation work plus his subsequent fixes for the review comments raised on that PR). Three files required manual conflict resolution where #3935 and #3933 made overlapping changes addressing the same concerns. In `pkg/tbtc/signer_approval_certificate.go`, the `ErrMissingWalletID` and `ErrMissingMembersIDsHash` sentinel errors from #3935 were dropped because Maclane's `ensureWalletRegistryDataAvailable` function already covers the same case and is in active use; keeping both would have left the sentinels as dead code. In `pkg/covenantsigner/server_test.go`, the test rewrite from #3935 (which depended on a `serviceCtx` parameter to `newHandler`) was reverted in favor of the existing middleware-based test, which already exercises the `context.WithoutCancel(r.Context())` detachment path. In `pkg/covenantsigner/store_test.go`, the test rename collision was resolved by taking #3935's name (`TestStoreLoadResolvesInvalidUpdatedAtForDuplicateRouteKeys`). Three additional cleanups followed from the merge mechanics: the godoc on `WalletChainData.MembersIDsHash` was updated to reference `ensureWalletRegistryDataAvailable` instead of the dropped sentinels, an orphaned `cancelService()` call was removed from `server.go` (its `serviceCtx` infrastructure did not auto-merge), and a duplicate `delete(s.byRequestID, existingID)` was deduplicated in `store.go`'s load path. ## Tests All tests across `pkg/covenantsigner`, `pkg/tbtc`, and `pkg/chain/ethereum` pass. A behavioral diff against the base branch shows identical build, vet, and file-listing outputs, with all `ok` lines matching except for timing variation. The two adapter tests added by Maclane (`TestMakeWalletChainData...`) and the two registry-unavailable tests for the issue-and-verify signer approval paths are preserved and pass.
2 parents b527032 + 8a2295a commit 245901f

8 files changed

Lines changed: 146 additions & 66 deletions

File tree

pkg/chain/ethereum/tbtc.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,8 +1478,10 @@ func (tc *TbtcChain) GetWallet(
14781478

14791479
walletRegistryWallet, err := tc.walletRegistry.GetWallet(wallet.EcdsaWalletID)
14801480
if err != nil {
1481-
logger.Warnf(
1482-
"cannot get wallet registry data for wallet [0x%x]: [%v]",
1481+
logger.Errorf(
1482+
"wallet registry unavailable for wallet [0x%x]; "+
1483+
"MembersIDsHash will be zero -- signer approval "+
1484+
"operations will fail until the registry recovers: [%v]",
14831485
wallet.EcdsaWalletID,
14841486
err,
14851487
)

pkg/covenantsigner/config.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ type Config struct {
1010
// binds to. Empty defaults to loopback-only.
1111
ListenAddress string
1212
// AuthToken enables static Bearer authentication for signer endpoints.
13-
// Non-loopback binds must set this.
13+
// Non-loopback binds must set this. Prefer environment variables or
14+
// config files over CLI flags to avoid exposing the token in
15+
// /proc/PID/cmdline.
1416
AuthToken string
1517
// EnableSelfV1 exposes the self_v1 signer HTTP routes. Keep this disabled
1618
// for a qc_v1-first launch unless self_v1 has cleared its own go-live gate.

pkg/covenantsigner/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ func Initialize(
120120
return nil, false, fmt.Errorf("failed to bind covenant signer port [%d]: %w", config.Port, err)
121121
}
122122

123-
go func() {
123+
go func() { // #nosec G118 -- parent ctx is already cancelled; shutdown needs a fresh deadline
124124
<-ctx.Done()
125125
shutdownCtx, cancelShutdown := context.WithTimeout(
126126
context.WithoutCancel(ctx),
@@ -239,7 +239,7 @@ func newHandler(service *Service, authToken string, enableSelfV1 bool) http.Hand
239239
}
240240

241241
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
242-
if r.URL.Path == "/healthz" {
242+
if r.Method == http.MethodGet && r.URL.Path == "/healthz" {
243243
mux.ServeHTTP(w, r)
244244
return
245245
}

pkg/covenantsigner/service.go

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"crypto/rand"
66
"encoding/hex"
7+
"errors"
78
"fmt"
89
"reflect"
910
"sync"
@@ -229,50 +230,28 @@ func (s *Service) loadPollJob(route TemplateID, input SignerPollInput) (*Job, er
229230
return job, nil
230231
}
231232

232-
func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubmitInput) (StepResult, error) {
233-
submitValidationOptions := validationOptions{
234-
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
235-
depositorTrustRoots: s.depositorTrustRoots,
236-
custodianTrustRoots: s.custodianTrustRoots,
237-
requireFreshMigrationPlanQuote: true,
238-
migrationPlanQuoteVerificationNow: s.now(),
239-
signerApprovalVerifier: s.signerApprovalVerifier,
240-
}
241-
if err := validateSubmitInput(route, input, submitValidationOptions); err != nil {
242-
return StepResult{}, err
243-
}
244-
245-
normalizedRequest, err := normalizeRouteSubmitRequest(
246-
input.Request,
247-
validationOptions{
248-
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
249-
depositorTrustRoots: s.depositorTrustRoots,
250-
custodianTrustRoots: s.custodianTrustRoots,
251-
signerApprovalVerifier: s.signerApprovalVerifier,
252-
},
253-
)
254-
if err != nil {
255-
return StepResult{}, err
256-
}
257-
258-
requestDigest, err := requestDigestFromNormalized(normalizedRequest)
259-
if err != nil {
260-
return StepResult{}, err
261-
}
262-
233+
// createOrDedup creates a new job under the service mutex, or returns the
234+
// existing job result if the route request is already known. Returns
235+
// (job, nil, nil) for a new job, or (nil, result, nil) for a dedup hit.
236+
func (s *Service) createOrDedup(
237+
route TemplateID,
238+
input SignerSubmitInput,
239+
normalizedRequest RouteSubmitRequest,
240+
requestDigest string,
241+
) (*Job, *StepResult, error) {
263242
s.mutex.Lock()
243+
defer s.mutex.Unlock()
244+
264245
if existing, ok, err := s.store.GetByRouteRequest(route, input.RouteRequestID); err != nil {
265-
s.mutex.Unlock()
266-
return StepResult{}, err
246+
return nil, nil, err
267247
} else if ok {
268248
if existing.RequestDigest != requestDigest {
269-
s.mutex.Unlock()
270-
return StepResult{}, &inputError{
249+
return nil, nil, &inputError{
271250
"routeRequestId already exists with a different request payload",
272251
}
273252
}
274-
s.mutex.Unlock()
275-
return mapJobResult(existing), nil
253+
result := mapJobResult(existing)
254+
return nil, &result, nil
276255
}
277256

278257
requestIDPrefix := ""
@@ -282,14 +261,12 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
282261
case TemplateSelfV1:
283262
requestIDPrefix = "kcs_self"
284263
default:
285-
s.mutex.Unlock()
286-
return StepResult{}, fmt.Errorf("unsupported route: %s", route)
264+
return nil, nil, fmt.Errorf("unsupported route: %s", route)
287265
}
288266

289267
requestID, err := newRequestID(requestIDPrefix)
290268
if err != nil {
291-
s.mutex.Unlock()
292-
return StepResult{}, err
269+
return nil, nil, err
293270
}
294271

295272
now := s.now()
@@ -309,10 +286,50 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
309286
}
310287

311288
if err := s.store.Put(job); err != nil {
312-
s.mutex.Unlock()
289+
return nil, nil, err
290+
}
291+
292+
return job, nil, nil
293+
}
294+
295+
func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubmitInput) (StepResult, error) {
296+
submitValidationOptions := validationOptions{
297+
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
298+
depositorTrustRoots: s.depositorTrustRoots,
299+
custodianTrustRoots: s.custodianTrustRoots,
300+
requireFreshMigrationPlanQuote: true,
301+
migrationPlanQuoteVerificationNow: s.now(),
302+
signerApprovalVerifier: s.signerApprovalVerifier,
303+
}
304+
if err := validateSubmitInput(route, input, submitValidationOptions); err != nil {
313305
return StepResult{}, err
314306
}
315-
s.mutex.Unlock()
307+
308+
normalizedRequest, err := normalizeRouteSubmitRequest(
309+
input.Request,
310+
validationOptions{
311+
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
312+
depositorTrustRoots: s.depositorTrustRoots,
313+
custodianTrustRoots: s.custodianTrustRoots,
314+
signerApprovalVerifier: s.signerApprovalVerifier,
315+
},
316+
)
317+
if err != nil {
318+
return StepResult{}, err
319+
}
320+
321+
requestDigest, err := requestDigestFromNormalized(normalizedRequest)
322+
if err != nil {
323+
return StepResult{}, err
324+
}
325+
326+
job, existingResult, err := s.createOrDedup(route, input, normalizedRequest, requestDigest)
327+
if err != nil {
328+
return StepResult{}, err
329+
}
330+
if existingResult != nil {
331+
return *existingResult, nil
332+
}
316333

317334
transition, err := s.engine.OnSubmit(ctx, job)
318335
if err != nil {
@@ -329,7 +346,7 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
329346
s.mutex.Lock()
330347
defer s.mutex.Unlock()
331348

332-
currentJob, ok, err := s.store.GetByRequestID(requestID)
349+
currentJob, ok, err := s.store.GetByRequestID(job.RequestID)
333350
if err != nil {
334351
return StepResult{}, err
335352
}
@@ -378,7 +395,7 @@ func (s *Service) Poll(ctx context.Context, route TemplateID, input SignerPollIn
378395

379396
transition, pollErr := s.engine.OnPoll(ctx, job)
380397
if pollErr != nil {
381-
if pollErr != errJobNotFound {
398+
if !errors.Is(pollErr, errJobNotFound) {
382399
return StepResult{}, pollErr
383400
}
384401
}
@@ -398,7 +415,7 @@ func (s *Service) Poll(ctx context.Context, route TemplateID, input SignerPollIn
398415
return mapJobResult(currentJob), nil
399416
}
400417

401-
if pollErr == errJobNotFound {
418+
if errors.Is(pollErr, errJobNotFound) {
402419
applyTransition(currentJob, &Transition{
403420
State: JobStateFailed,
404421
Reason: ReasonJobNotFound,

pkg/covenantsigner/store.go

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ func NewStore(handle persistence.BasicHandle, dataDir string) (*Store, error) {
5555
// acquireFileLock creates and acquires an exclusive non-blocking advisory lock
5656
// on a lock file inside the jobs directory. The returned file handle must be
5757
// kept open for the lifetime of the lock; closing it releases the lock.
58+
//
59+
// IMPORTANT: This uses POSIX flock(2), which is advisory and Linux-specific.
60+
// It protects against concurrent processes on the same host but does NOT
61+
// protect against concurrent access over network filesystems (NFS, EFS,
62+
// CIFS). The data directory MUST reside on local or block-level storage
63+
// with single-writer access (e.g., Kubernetes ReadWriteOnce PV).
5864
func acquireFileLock(dataDir string) (*os.File, error) {
5965
lockPath := filepath.Join(dataDir, jobsDirectory, lockFileName)
6066

@@ -155,6 +161,8 @@ func (s *Store) load() error {
155161

156162
dataChan, errorChan := s.handle.ReadAll()
157163

164+
var loaded int
165+
158166
for dataChan != nil || errorChan != nil {
159167
select {
160168
case descriptor, ok := <-dataChan:
@@ -196,35 +204,69 @@ func (s *Store) load() error {
196204
// candidate's timestamp is valid, the failure is on
197205
// the existing job -- replace it. Otherwise skip the
198206
// candidate.
199-
if _, parseErr := time.Parse(time.RFC3339Nano, job.UpdatedAt); parseErr != nil {
207+
_, existingParseErr := time.Parse(time.RFC3339Nano, existing.UpdatedAt)
208+
_, candidateParseErr := time.Parse(time.RFC3339Nano, job.UpdatedAt)
209+
210+
switch {
211+
case candidateParseErr != nil && existingParseErr == nil:
212+
// Only the candidate is unparseable; keep existing.
200213
logger.Warnf(
201-
"skipping job [%s] with invalid timestamp on duplicate route key [%s/%s]: [%v]",
214+
"skipping job [%s] with invalid timestamp on duplicate route key [%s/%s] (keeping [%s]): [%v]",
202215
job.RequestID,
203216
job.Route,
204217
job.RouteRequestID,
218+
existing.RequestID,
205219
err,
206220
)
207221
continue
222+
case candidateParseErr == nil && existingParseErr != nil:
223+
// Only the existing is unparseable; replace with candidate.
224+
logger.Warnf(
225+
"replacing job [%s] with invalid timestamp on duplicate route key [%s/%s]: [%v]",
226+
existing.RequestID,
227+
job.Route,
228+
job.RouteRequestID,
229+
err,
230+
)
231+
default:
232+
// Both timestamps are unparseable. Use
233+
// lexicographic RequestID as a deterministic
234+
// tiebreaker so the outcome does not depend
235+
// on file iteration order.
236+
if existing.RequestID <= job.RequestID {
237+
logger.Warnf(
238+
"skipping job [%s] on duplicate route key [%s/%s] (keeping [%s], lexicographic tiebreak): [%v]",
239+
job.RequestID,
240+
job.Route,
241+
job.RouteRequestID,
242+
existing.RequestID,
243+
err,
244+
)
245+
continue
246+
}
247+
logger.Warnf(
248+
"replacing job [%s] on duplicate route key [%s/%s] (lexicographic tiebreak): [%v]",
249+
existing.RequestID,
250+
job.Route,
251+
job.RouteRequestID,
252+
err,
253+
)
208254
}
209-
logger.Warnf(
210-
"replacing job [%s] with invalid timestamp on duplicate route key [%s/%s]: [%v]",
211-
existing.RequestID,
212-
job.Route,
213-
job.RouteRequestID,
214-
err,
215-
)
216255
} else if existingIsNewerOrSame {
217256
continue
218257
}
219258
}
220259

260+
// Remove the superseded job from the primary index so stale
261+
// entries do not leak in byRequestID.
221262
if existingID != job.RequestID {
222263
delete(s.byRequestID, existingID)
223264
}
224265
}
225266

226267
s.byRequestID[job.RequestID] = job
227268
s.byRouteKey[key] = job.RequestID
269+
loaded++
228270
case err, ok := <-errorChan:
229271
if !ok {
230272
errorChan = nil
@@ -236,6 +278,10 @@ func (s *Store) load() error {
236278
}
237279
}
238280

281+
if loaded > 0 {
282+
logger.Infof("store load complete: loaded [%d] jobs", loaded)
283+
}
284+
239285
return nil
240286
}
241287

@@ -260,7 +306,9 @@ func (s *Store) GetByRouteRequest(route TemplateID, routeRequestID string) (*Job
260306
s.mutex.Lock()
261307
defer s.mutex.Unlock()
262308

263-
requestID, ok := s.byRouteKey[routeKey(route, routeRequestID)]
309+
key := routeKey(route, routeRequestID)
310+
311+
requestID, ok := s.byRouteKey[key]
264312
if !ok {
265313
return nil, false, nil
266314
}

pkg/covenantsigner/store_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ func TestStoreLoadSelectsNewestJobForDuplicateRouteKeys(t *testing.T) {
211211
}
212212
}
213213

214-
func TestStoreLoadKeepsBestAvailableJobWhenDuplicateUpdatedAtInvalid(t *testing.T) {
214+
func TestStoreLoadResolvesInvalidUpdatedAtForDuplicateRouteKeys(t *testing.T) {
215215
handle := newMemoryHandle()
216216

217217
first := &Job{

pkg/tbtc/chain.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,12 @@ type DepositChainRequest struct {
415415
}
416416

417417
// WalletChainData represents wallet data stored on-chain.
418+
//
419+
// EcdsaWalletID and MembersIDsHash are sourced from the wallet registry.
420+
// When the registry is unavailable during a fault-isolated GetWallet call,
421+
// these fields contain their zero values. Consumers that require registry
422+
// data (e.g. signer approval certificate computation) must guard against
423+
// zero values via ensureWalletRegistryDataAvailable.
418424
type WalletChainData struct {
419425
EcdsaWalletID [32]byte
420426
// MembersIDsHash is populated from the wallet registry rather than the

pkg/tbtc/covenant_signer.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/btcsuite/btcd/txscript"
1717
"github.com/keep-network/keep-core/pkg/bitcoin"
1818
"github.com/keep-network/keep-core/pkg/covenantsigner"
19+
"github.com/keep-network/keep-core/pkg/internal/canonicaljson"
1920
"github.com/keep-network/keep-core/pkg/tecdsa"
2021
)
2122

@@ -870,10 +871,14 @@ func buildWitnessSignatureBytes(signature *tecdsa.Signature) ([]byte, error) {
870871
}
871872

872873
func computeQcV1SignerHandoffPayloadHash(payload map[string]any) (string, error) {
873-
// The handoff bundle ID is content-addressed using Go's stable JSON map-key
874-
// ordering. Future non-Go custodian consumers that want to recompute this
875-
// hash must preserve the same canonical field set and serialization rules.
876-
rawPayload, err := json.Marshal(payload)
874+
// The handoff bundle ID is content-addressed using canonical JSON
875+
// (alphabetical key ordering, no HTML escaping, no trailing newline).
876+
// Go's encoding/json.Marshal already sorts map keys alphabetically
877+
// (since Go 1.12), so using canonicaljson.Marshal produces identical
878+
// output for non-HTML content while also disabling HTML escaping for
879+
// safety. Non-Go custodian consumers that recompute this hash must
880+
// use the same canonical serialization rules.
881+
rawPayload, err := canonicaljson.Marshal(payload)
877882
if err != nil {
878883
return "", err
879884
}

0 commit comments

Comments
 (0)