string - Adding leading zeros in the character and numeric mixed column names in R - Stack Overflow

admin2025-04-21  2

I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"

I want to add leading zeros to the digits after "Nr", so that I can convert it all to 4 digit numbers; "Sample_ID", "Time00", "X7236Nr0001", "Y844Nr1856", "X9834Nr0021", "S844Nr0567"

I tried to use rename_at to select the columns and apply the appropriate function such as sprintf,

df %>% rename_at(vars(starts_with("[A-B][0-9]")), ~ FUNCTION)

but could not build correct function. Can you please advise any way to deal with that kind of mixed strings?

Thanks in advance

I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"

I want to add leading zeros to the digits after "Nr", so that I can convert it all to 4 digit numbers; "Sample_ID", "Time00", "X7236Nr0001", "Y844Nr1856", "X9834Nr0021", "S844Nr0567"

I tried to use rename_at to select the columns and apply the appropriate function such as sprintf,

df %>% rename_at(vars(starts_with("[A-B][0-9]")), ~ FUNCTION)

but could not build correct function. Can you please advise any way to deal with that kind of mixed strings?

Thanks in advance

Share Improve this question edited Jan 23 at 10:29 Wiktor Stribiżew 628k41 gold badges498 silver badges616 bronze badges asked Jan 22 at 23:07 eraysahineraysahin 1196 bronze badges
Add a comment  | 

4 Answers 4

Reset to default 4

You can try

  • Option 1
idx <- grepl("Nr\\d+$", s)
x <- s[idx]
s[idx] <- paste0(sub("\\d+$", "", x), sprintf("%.4i", as.integer(sub(".*Nr", "", x))))
  • Option 2
sapply(
  strsplit(s, "(?<=Nr)(?=\\d)", perl = TRUE),
  \(x) {
    if (length(x) == 2) {
      paste0(x[1], strrep("0", max(0, 4 - nchar(x[2]))), x[2])
    } else {
       x
    }
  }
)
  • Option 3
library(gsubfn)
gsubfn("(.*Nr)(\\d+)$", ~ paste0(x, sprintf("%.4i", as.integer(y))), s)

or

gsubfn("Nr(\\d+)$" ~ sprintf("Nr%.4i", as.integer(x)), s)

which gives

> s
[1] "Sample_ID"   "Time00"      "X7236Nr0001" "Y844Nr1856"  "X9834Nr0021"
[6] "S844Nr0567"

Data

s <- c("Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567")

Use str_Replace_all as shown below:

str_replace_all(names(df), "(?<=Nr)\\d+", ~sprintf("%04d", as.numeric(.x)))

[1] "Sample_ID"   "Time00"      "X7236Nr0001" "Y844Nr1856"  "X9834Nr0021"
[6] "S844Nr0567" 

The regex is even simpler if you can use rename_with:

df %>% 
rename_with(~str_replace_all(., "\\d+", ~sprintf("%04d", as.numeric(.))), matches("Nr\\d+"))

Sample_ID Time00 X7236Nr0001 Y0844Nr1856 X9834Nr0021 S0844Nr0567
1         1      A           1           4           7          10
2         2      B           2           5           8          11
3         3      C           3           6           9          12

in base R, this will directly change the names of the df:

m <- regexpr("(?<=Nr)\\d+", names(df), perl = TRUE)
regmatches(names(df), m) <- sprintf("%04d", as.numeric(regmatches(names(df), m)))

df
  Sample_ID Time00 X7236Nr0001 Y844Nr1856 X9834Nr0021 S844Nr0567
1         1      A           1          4           7         10
2         2      B           2          5           8         11
3         3      C           3          6           9         12

In one row

# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)
# one row only base R for the enthusiasts w/o any explanation
df <- do.call(data.frame, lapply(names(df), function(x) setNames(list(df[[x]]), if(grepl("Nr(\\d+)", x)) paste0(sub("Nr(\\d+)", "", x), "Nr", sprintf("%04d", as.numeric(sub('.+Nr(.+)', '\\1', x)))) else x)))

Accepted Answer

I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"

you can do it by using str_replace_all with a str_match that finds the "Nr" + number and str_pad()s the number to 4 digits with zeroes.

library(dplyr)
library(stringr)

# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)

df <- df %>%
  rename_with(~ str_replace_all(., "Nr(\\d+)", function(x) {
    match <- str_match(x, "Nr(\\d+)")
    if (!is.na(match[2])) {
      paste0("Nr", str_pad(match[2], 4, pad = "0")) # only do if "Nr" is found
    } else {
      x
    }
  }))

### Result
> colnames(df)
"Sample_ID"   "Time00"      "X7236Nr0001" "Y844Nr1856"  "X9834Nr0021" "S844Nr0567" 

# Explanations
> str_match("Y844Nr0856", "Nr(\\d+)")
     [,1]     [,2]  
[1,] "Nr0856" "0856"
 
> str_match("Time00", "Nr(\\d+)") # has NA as match[,2], therefore we will not replace anything
     [,1] [,2]
[1,] NA   NA  
 
> str_pad("856", 4, pad = "0") # could also use sprintf()
[1] "0856"

Here a way using sprintf

> f <- \(x) {
+   sts <- strsplit(x, '(?<=Nr)', perl=TRUE)
+   nbs <- sapply(sts[u <- lengths(sts) > 1], `[[`, 2)
+   sts[u] <- Map(c, lapply(sts[u], `[[`, 1), 
+       sprintf(paste0('%0', max(nchar(nbs)), 'd'), as.integer(nbs))
+   ) 
+   sts |> sapply(paste, collapse='')
+ }
> f(x)
[1] "Sample_ID"   "Time00"      "X7236Nr0001" "Y844Nr1856"  "X9834Nr0021" "S844Nr0567" 
转载请注明原文地址:http://anycun.com/QandA/1745224210a90437.html