Skip to content

Commit a24ea8c

Browse files
committed
CCBC-1634: Fix reporting unresponsive nodes in lcb_ping()
* do not retry NOOP commands, as they might be routed to different pipeline, instead fail fast NOOPs to reflect network issues more precisely. * use pipeline address as ping entry identifier instead of socket address, as socket might not be existing (not connected) due to network failures. * lcb_ping still have report even when overall status is not LCB_SUCCESS, so cbc-ping should still try to print report instead just printing overall status code. Change-Id: I49cc78022e26c25d4140ad6fc6d287e4e6cb0c39 Reviewed-on: https://review.couchbase.org/c/libcouchbase/+/206208 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Brett Lawson <brett19@gmail.com>
1 parent 9a7498b commit a24ea8c

3 files changed

Lines changed: 18 additions & 8 deletions

File tree

src/operations/ping.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,11 @@ static void handle_ping(mc_PIPELINE *pipeline, mc_PACKET *req, lcb_CALLBACK_TYPE
418418

419419
if (err != LCB_ERR_REQUEST_CANCELED && ck->needMetrics()) {
420420
lcb_PINGSVC svc = {};
421+
{
422+
char id[20] = {0};
423+
snprintf(id, sizeof(id), "%p", (void *)pipeline);
424+
svc.id = lcb_strdup(id);
425+
}
421426
if (server->has_valid_host()) {
422427
const lcb_host_t &remote = server->get_host();
423428
std::string hh;
@@ -444,10 +449,7 @@ static void handle_ping(mc_PIPELINE *pipeline, mc_PACKET *req, lcb_CALLBACK_TYPE
444449
}
445450
lcbio_CTX *ctx = server->connctx;
446451
if (ctx) {
447-
char id[20] = {0};
448452
svc.local = lcb_strdup(ctx->sock->info->ep_local_host_and_port);
449-
snprintf(id, sizeof(id), "%p", (void *)ctx->sock);
450-
svc.id = lcb_strdup(id);
451453
}
452454
svc.scope = server->get_instance()->get_bucketname();
453455

src/retrychk.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,22 @@ static int mc_is_idempotent(uint8_t opcode)
7272

7373
lcb_RETRY_ACTION lcb_kv_should_retry(const lcb_settings *settings, const mc_PACKET *pkt, lcb_STATUS err)
7474
{
75+
lcb_RETRY_ACTION retry_action{};
7576
protocol_binary_request_header hdr;
7677

7778
mcreq_read_hdr(pkt, &hdr);
7879

80+
if (hdr.request.opcode == PROTOCOL_BINARY_CMD_NOOP) {
81+
// do not retry NOOP, it should fast fail to capture connection errors
82+
retry_action.should_retry = 0;
83+
return retry_action;
84+
}
85+
7986
lcb_RETRY_REASON retry_reason = mc_code_to_reason(err);
8087
lcb_RETRY_REQUEST retry_req;
8188
retry_req.operation_cookie = const_cast<void *>(MCREQ_PKT_COOKIE(pkt));
8289
retry_req.is_idempotent = mc_is_idempotent(hdr.request.opcode);
8390
retry_req.retry_attempts = pkt->retries;
84-
lcb_RETRY_ACTION retry_action{};
8591

8692
if (err == LCB_ERR_AUTHENTICATION_FAILURE || err == LCB_ERR_TOPOLOGY_CHANGE || err == LCB_ERR_BUCKET_NOT_FOUND) {
8793
/* spurious auth error */

tools/cbc.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,9 @@ static void ping_callback(lcb_INSTANCE *, int, const lcb_RESPPING *resp)
457457
{
458458
lcb_STATUS rc = lcb_respping_status(resp);
459459
if (rc != LCB_SUCCESS) {
460-
fprintf(stderr, "failed: %s\n", lcb_strerror_short(rc));
461-
} else {
460+
fprintf(stderr, "Overall status: %s\n", lcb_strerror_short(rc));
461+
}
462+
{
462463
const char *json;
463464
size_t njson;
464465
lcb_respping_value(resp, &json, &njson);
@@ -494,8 +495,9 @@ static void ping_table_callback(lcb_INSTANCE *, int, const lcb_RESPPING *resp)
494495
{
495496
lcb_STATUS rc = lcb_respping_status(resp);
496497
if (rc != LCB_SUCCESS) {
497-
fprintf(stderr, "failed: %s\n", lcb_strerror_short(rc));
498-
} else {
498+
fprintf(stderr, "Overall status: %s\n", lcb_strerror_short(rc));
499+
}
500+
{
499501
const char *json;
500502
size_t njson = 0;
501503
lcb_respping_value(resp, &json, &njson);

0 commit comments

Comments
 (0)