I have a dataset, input_ds_wt
that contains values for RSE_VAR
and wt.mean_VAR
for many variables, VAR.
input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1,
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2),
wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03,
0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154,
0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125,
0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA,
-6L))
gives
input_ds_wt
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000
6 6 0.0 0.2 0.03 0.04 Inf 0.2000
For every VAR I want the user-defined function suppress_fn()
to return a 0 if RSE_VAR < 0.3
and wt.mean_VAR > 0.9
, and 1 else. Therefore I want it to return:
output_supress
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1 suppress_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0 1
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0 1
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0 1
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0 1
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1 1`
which I have created using the following statement, but I want to do it for all variables in VAR without specifying same suppress_VAR function the same number of times as the dimension of VAR. Can anyone show me how to do it using dplyr:: pivot_longer
followed by pivot_wider
? Other methods welcome too.
output_supress = input_ds_wt %>%
mutate(suppress_v1 = if_else(RSE_v1 < 0.3 & wt.mean_v1 > 0.9,0, 1 ),
suppress_v2 = if_else(RSE_v2 < 0.3 & wt.mean_v2 > 0.9,0, 1 ) )
output_supress
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1 suppress_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0 1
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0 1
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0 1
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0 1
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1 1
I have a dataset, input_ds_wt
that contains values for RSE_VAR
and wt.mean_VAR
for many variables, VAR.
input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1,
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2),
wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03,
0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154,
0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125,
0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA,
-6L))
gives
input_ds_wt
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000
6 6 0.0 0.2 0.03 0.04 Inf 0.2000
For every VAR I want the user-defined function suppress_fn()
to return a 0 if RSE_VAR < 0.3
and wt.mean_VAR > 0.9
, and 1 else. Therefore I want it to return:
output_supress
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1 suppress_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0 1
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0 1
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0 1
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0 1
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1 1`
which I have created using the following statement, but I want to do it for all variables in VAR without specifying same suppress_VAR function the same number of times as the dimension of VAR. Can anyone show me how to do it using dplyr:: pivot_longer
followed by pivot_wider
? Other methods welcome too.
output_supress = input_ds_wt %>%
mutate(suppress_v1 = if_else(RSE_v1 < 0.3 & wt.mean_v1 > 0.9,0, 1 ),
suppress_v2 = if_else(RSE_v2 < 0.3 & wt.mean_v2 > 0.9,0, 1 ) )
output_supress
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1 suppress_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0 1
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0 1
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0 1
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0 1
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1 1
Using data.table, we will need to reshape couple of times, this could be optimised, but the idea is the same:
library(data.table)
setDT(input_ds_wt)
#1.reshape to get version
x <- melt(input_ds_wt, id.vars = "id")
x[, c("variable", "version") := tstrsplit(variable, split = "_") ]
#2.reshape to spread variables per version
x <- dcast(x, id + version ~ variable, value.var = "value")
#3.calculate suppress
x[, suppress := fifelse(RSE < 0.3 & wt.mean > 0.9, 0, 1) ]
#4.reshape to add versions to variable names
x <- melt(x, id.vars = c("id", "version") )
x[, variable := paste(variable, version, sep = "_") ]
#5.reshape to get original data structure
x <- dcast(x, id ~ variable, value.var = "value")
#x
# Key: <id>
# id RSE_v1 RSE_v2 suppress_v1 suppress_v2 wt.SE_v1 wt.SE_v2 wt.mean_v1 wt.mean_v2
# <num> <num> <num> <num> <num> <num> <num> <num> <num>
# 1: 1 0.100000000 0.0375 0 1 0.10 0.03 1.0 0.8
# 2: 2 0.010000000 1.5000 0 1 0.01 0.30 1.0 0.2
# 3: 3 0.153846154 0.0125 0 1 0.20 0.01 1.3 0.8
# 4: 4 0.008695652 0.5000 0 1 0.02 0.10 2.3 0.2
# 5: 5 0.300000000 0.5000 1 1 0.30 0.40 1.0 0.8
# 6: 6 Inf 0.2000 1 1 0.03 0.04 0.0 0.2
Not sure if this is what you're looking for, but the function below takes a vector of quoted variable names and data and then creates the suppressed variables and adds them back into the data. I do this by evaluating both conditions and then identifying places where they both do not hold. Then I turn the logicals into numbers.
input_ds_wt = structure(list(id = c(1, 2, 3, 4, 5, 6), wt.mean_v1 = c(1, 1,
1.3, 2.3, 1, 0), wt.mean_v2 = c(0.8, 0.2, 0.8, 0.2, 0.8, 0.2),
wt.SE_v1 = c(0.1, 0.01, 0.2, 0.02, 0.3, 0.03), wt.SE_v2 = c(0.03,
0.3, 0.01, 0.1, 0.4, 0.04), RSE_v1 = c(0.1, 0.01, 0.153846153846154,
0.00869565217391304, 0.3, Inf), RSE_v2 = c(0.0375, 1.5, 0.0125,
0.5, 0.5, 0.2)), class = "data.frame", row.names = c(NA,
-6L))
vars <- c("v1", "v2")
suppress <- function(vars, data, ...){
cond1 <- data[,paste0("RSE_", vars)] < .3
cond2 <- data[,paste0("wt.mean_", vars)] > .9
out <- apply(!(cond1 & cond2), 2, as.numeric)
colnames(out) <- paste0("suppress_", vars)
cbind(data, out)
}
suppress(c("v1", "v2"), input_ds_wt)
#> id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1
#> 1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0
#> 2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0
#> 3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0
#> 4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0
#> 5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1
#> 6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1
#> suppress_v2
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 1
#> 6 1
Created on 2025-02-04 with reprex v2.1.1
Probably you can try this
input_ds_wt %>%
cbind({
.
} %>%
select(!id & contains(c("mean", "RSE"))) %>%
split.default(sub(".*_", "suppress_", names(.))) %>%
map_dfc(~ .x %>%
relocate(starts_with("wt")) %>%
{
+!(.x[[1]] > 0.9 & .x[[2]] < 0.3)
}))
which gives
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 suppress_v1
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1
suppress_v2
1 1
2 1
3 1
4 1
5 1
6 1
You could write a small comparison function and then mapply
over the column pairs. grep
ing using value=TRUE
with subsequent sort
ing for more safety.
> fn <- \(i, j, data=input_ds_wt) {c(1, 0)[1 + (data[, i] < 0.3 & data[, j] > 0.9)]}
> nm <- names(input_ds_wt)
> app <- mapply(fn, sort(grep('RSE', nm, value=TRUE)), sort(grep('mean', nm, value=TRUE)))
> input_ds_wt |> cbind(`colnames<-`(app, paste0('supress_v', seq_len(ncol(app)))))
id wt.mean_v1 wt.mean_v2 wt.SE_v1 wt.SE_v2 RSE_v1 RSE_v2 supress_v1 supress_v2
1 1 1.0 0.8 0.10 0.03 0.100000000 0.0375 0 1
2 2 1.0 0.2 0.01 0.30 0.010000000 1.5000 0 1
3 3 1.3 0.8 0.20 0.01 0.153846154 0.0125 0 1
4 4 2.3 0.2 0.02 0.10 0.008695652 0.5000 0 1
5 5 1.0 0.8 0.30 0.40 0.300000000 0.5000 1 1
6 6 0.0 0.2 0.03 0.04 Inf 0.2000 1 1