diff --git a/R/data.table.R b/R/data.table.R index a3ea551fa..648271f8e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2893,6 +2893,28 @@ setattr = function(x,name,value) { invisible(x) } +process_name_policy = function(names_vec) { + policy = getOption("datatable.unique.names", "off") + if (policy == "off") return(names_vec) + + allowed = c("warn", "error", "rename") + if (!policy %in% allowed) { + warningf("Invalid value for 'datatable.unique.names': [%s]. Falling back to 'off'. Allowed values are: 'off', 'warn', 'error', 'rename'.", as.character(policy)) + return(names_vec) + } + + if (anyDuplicated(names_vec)) { + dups = unique(names_vec[duplicated(names_vec)]) + msg = paste0("Duplicate column names created: ", brackify(dups), ". This may cause ambiguity.") + switch(policy, + warn = warningf("%s", msg), + error = stopf("%s", msg), + rename = return(make.unique(names_vec)) + ) + } + names_vec +} + setnames = function(x,old,new,skip_absent=FALSE) { # Sets by reference, maintains truelength, no copy of table at all. # But also more convenient than names(DT)[i]="newname" because we can also do setnames(DT,"oldname","newname") @@ -2950,6 +2972,12 @@ setnames = function(x,old,new,skip_absent=FALSE) { if (!length(new)) return(invisible(x)) # no changes if (length(i) != length(new)) internal_error("length(i)!=length(new)") # nocov } + + full_names = names(x) + full_names[i] = new + full_names = process_name_policy(full_names) + new = full_names[i] + # update the key if the column name being change is in the key m = chmatch(names(x)[i], key(x)) w = which(!is.na(m)) diff --git a/R/onLoad.R b/R/onLoad.R index b72fee4d1..ba404305b 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -98,7 +98,8 @@ datatable.auto.index=TRUE, # DT[col=="val"] to auto add index so 2nd time faster datatable.use.index=TRUE, # global switch to address #1422 datatable.prettyprint.char=NULL, # FR #1091 - datatable.old.matrix.autoname=FALSE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change + datatable.old.matrix.autoname=FALSE, # #7145: how data.table(x=1, matrix(1)) is auto-named set to change + datatable.unique.names = "off" ) opts = opts[!names(opts) %chin% names(options())] options(opts) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 687bf929e..5f3767627 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21660,3 +21660,13 @@ test(2374.08, key(DT[, .(a, a)]), NULL) test(2374.09, key(subset(DT, select=c(a, a))), NULL) DT = data.table(a=1:2, a.1=3:4, val=10:11) test(2374.10, key(DT[, .(a.1, sum(val)), keyby=.(a, a)]), NULL) + +# PR #7674: Add datatable.unique.names option to control duplicate names in setnames() +DT = data.table(a=1, b=2) +test(2375.1, names(setnames(copy(DT), "b", "a")), c("a", "a"), options = list(datatable.unique.names = "off")) +test(2375.2, names(setnames(copy(DT), "b", "a")), c("a", "a"), warning = "Duplicate column names created", options = list(datatable.unique.names = "warn")) +test(2375.3, setnames(copy(DT), "b", "a"), error = "Duplicate column names created", options = list(datatable.unique.names = "error")) +test(2375.4, names(setnames(copy(DT), "b", "a")), c("a", "a.1"), options = list(datatable.unique.names = "rename")) +DT_pct = data.table("a%d"=1, b=2) +test(2375.5, names(setnames(copy(DT_pct), "b", "a%d")), c("a%d", "a%d"), warning = "Duplicate column names created", options = list(datatable.unique.names = "warn")) +test(2375.6, names(setnames(copy(DT), "b", "a")), c("a", "a"), warning = "Invalid value for 'datatable.unique.names'", options = list(datatable.unique.names = "wrong_choice")) diff --git a/man/data.table-options.Rd b/man/data.table-options.Rd index 439e88ef2..a7d0c7084 100644 --- a/man/data.table-options.Rd +++ b/man/data.table-options.Rd @@ -105,6 +105,15 @@ \item{\code{datatable.enlist}}{Experimental feature. Default is \code{NULL}. If set to a function (e.g., \code{list}), the \code{j} expression can return a \code{list}, which will then be "enlisted" into columns in the result.} + \item{\code{datatable.unique.names}}{A character string, default \code{"off"}. + Controls the behavior when \code{setnames} would result in duplicate column names. + Possible values: + \itemize{ + \item \code{"off"}: (default) Duplicates are allowed silently. + \item \code{"warn"}: A warning is issued if duplicates are created. + \item \code{"error"}: An error is raised, preventing the change. + \item \code{"rename"}: Duplicate names are made unique via \code{make.unique}. + }} } }