stringr

library(tidyverse)

Overview

stringr is built on top of stringi, which uses the ICU C library to provide fast, correct implementations of common string manipulations. stringr focusses on the most important and commonly used string manipulation functions whereas stringi provides a comprehensive set covering almost anything you can imagine. If you find that stringr is missing a function that you need, try looking in stringi. Both packages share similar conventions, so once you’ve mastered stringr, you should find stringi similarly easy to use.

generate data

fruit <- stringr::fruit[1:30]

sentences <- stringr::sentences[1:20]

foo <- c('   a   bc   d   ')

Regular Expressions

`str_view(string, pattern, match = NA)`

`str_view_all(string, pattern, match = NA)`

str_view(fruit[1:5], '[aeiou]')

str_view_all(fruit[1:5], '[aeiou]')

special character———`[ ]` `\` `^` `$` `.` `|` `?` `*` `+` `( )`

\ (learn more about escaped characters: ?'"')
```
quote <- c(" ' ", ' " ', ' \' ', " \" ")
writeLines(quote)
```
```
#>  ' 
#>  " 
#>  ' 
#>  "
```
```
str_view_all(c("ab\nc", "12d", "ae2"), "\n")
```
```
str_view_all(c("ab\nc", "12\bd", "ae2"), "\b")
```
```
str_view_all(c("ab\nc", "12\bd", "ae2"), "\\b")
```
```
str_view_all(c("ab\nc", "12\\d", "ae2"), "\\\\")
```
\d matches any digit(\D)
\s matches any whitespace (e.g. space, tab, newline)(\S)
\w match any word character,which includes alphabetic characters, marks and decimal numbers(\W)
\b matches word boundaries, the transition between word and non-word characters(\B)
```
str_view_all(c("abc", "12d", "ae2"), "\\d")
```
```
str_view_all(c("abc", "\\12d", "1e2"), "\\w")
```
```
str_view_all(c("a c", "\\1\td", "1e2"), "\\s")
```
```
str_view_all(c("a c", "\\1\td", "1e2"), "\\b")
```
| [] ()
```
str_view(c("longest", "lonlest", "lonaest"), "lon(g|l)est")
```
```
str_view(c("longest", "lonlest", "lonaest"), "lon[gal]est")
```
```
str_view(c("longest", "lonlest", "lonaest"), "lon[a-g]est")
```
```
str_view(c("longest", "lonlest", "lonaest"), "lon[^a-g]est")
```
. matches any character (except a newline)
```
x <- c("apple", "ba\nnana", "pear")
writeLines(x)
```
```
#> apple
#> ba
#> nana
#> pear
```
```
str_view_all(x, ".a.")
```
```
str_view_all(x, ".a\\b")
```
```
str_view_all(x, ".a\\n")
```
^/$ match the start/end of the string
```
str_view(c("a\\abpl^e", "bbna\bna", "pear", "aaa"), "^a")
```
```
str_view(c("apple", "banana", "pear"), "a$")
```
```
str_view(c("$^$"), "\\$\\^\\$")
```
Repetition

?: 0 or 1

+: 1 or more

*: 0 or more

{n}/{n,}/{n,m}
```
str_view("1888 is the longest year in Roman numerals: MDCCCLXXXVIII", "CC?")
```
```
str_view("1888 is the longest year in Roman numerals: MDCCCCLXXXVIII", "CC+")
```
```
str_view("1888 is the longest year in Roman numerals: MDCCCLXXXVIII", "CC*")
```
```
str_view("1888 is the longest year in Roman numerals: MDCCCLXXXVIII", "C{2,3}")
```
Grouping and backreferences
```
str_view(fruit[16:25], "(..)\\1")
```
```
str_view(c("bacdb","bacdbacd"), "^(.).*\\1$")
```

`regex(pattern, ignore_case = FALSE, multiline = FALSE, comments = FALSE, dotall = FALSE)`

ignore_case: Should case differences be ignored in the match?
multiline: If TRUE, “$” and “^” match the beginning and end of each line. If FALSE, the default, only match the start and end of the input.
comments: If TRUE, white space and comments beginning with “#” are ignored. Escape literal spaces with “\”.
dotall: If TRUE, “.” will also match line terminators.

x <- c("apple", "ba\nnana", "pear")

str_view_all(x, regex(".A.", ignore_case=T, dotall = T))

str_view_all("A\nb", regex("^b", multiline=T))

Detect Matches

str_detect(string, pattern, negate = FALSE): Detect the presence or absence of a pattern in a string.
str_which(string, pattern, negate = FALSE): find positions.
str_count(string, pattern = ""): Count the number of matches in a string.
str_locate(string, pattern): returns an integer matrix
str_locate_all(string, pattern): returns a list of integer matrices

fruit
#>  [1] "apple"        "apricot"      "avocado"      "banana"       "bell pepper" 
#>  [6] "bilberry"     "blackberry"   "blackcurrant" "blood orange" "blueberry"   
#> [11] "boysenberry"  "breadfruit"   "canary melon" "cantaloupe"   "cherimoya"   
#> [16] "cherry"       "chili pepper" "clementine"   "cloudberry"   "coconut"     
#> [21] "cranberry"    "cucumber"     "currant"      "damson"       "date"        
#> [26] "dragonfruit"  "durian"       "eggplant"     "elderberry"   "feijoa"

`str_detect(string, pattern, negate = FALSE)`

str_detect(fruit[1:5], 'a', negate = T)
#> [1] FALSE FALSE FALSE FALSE  TRUE

test <- matrix(fruit[1:10],nrow=5)
str_detect(test,'a')
#>  [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE

str_detect('appleapricot', fruit[1:5])
#> [1]  TRUE  TRUE FALSE FALSE FALSE

str_detect(fruit[1:5], c('a', 'b', 'c', 'd', 'e'))
#> [1]  TRUE FALSE  TRUE FALSE  TRUE

`str_which(string, pattern, negate = FALSE)`

str_which(fruit[1:5], 'a')
#> [1] 1 2 3 4

str_which(fruit, 'a', negate = T)
#>  [1]  5  6 10 11 16 17 18 19 20 22 29

`str_count(string, pattern = "")`

str_count(fruit[1:5], 'a')
#> [1] 1 1 2 3 0

str_count('abababa', 'aba')
#> [1] 2

`str_locate(string, pattern)`

`str_locate_all(string, pattern)`

str_locate(fruit[1:5], 'a')
#>      start end
#> [1,]     1   1
#> [2,]     1   1
#> [3,]     1   1
#> [4,]     2   2
#> [5,]    NA  NA

str_locate_all(fruit[1:5], 'a')
#> [[1]]
#>      start end
#> [1,]     1   1
#> 
#> [[2]]
#>      start end
#> [1,]     1   1
#> 
#> [[3]]
#>      start end
#> [1,]     1   1
#> [2,]     5   5
#> 
#> [[4]]
#>      start end
#> [1,]     2   2
#> [2,]     4   4
#> [3,]     6   6
#> 
#> [[5]]
#>      start end

Subset Strings

str_sub(string, start = 1L, end = -1L) Extract and replace substrings from a character vector.
str_subset(string, pattern, negate = FALSE) Keep strings matching a pattern, or find positions.
str_extract(string, pattern) Extract matched groups from a string.
str_extract_all(string, pattern, simplify = FALSE) Extract matched groups from a string.

fruit
#>  [1] "apple"        "apricot"      "avocado"      "banana"       "bell pepper" 
#>  [6] "bilberry"     "blackberry"   "blackcurrant" "blood orange" "blueberry"   
#> [11] "boysenberry"  "breadfruit"   "canary melon" "cantaloupe"   "cherimoya"   
#> [16] "cherry"       "chili pepper" "clementine"   "cloudberry"   "coconut"     
#> [21] "cranberry"    "cucumber"     "currant"      "damson"       "date"        
#> [26] "dragonfruit"  "durian"       "eggplant"     "elderberry"   "feijoa"

`str_sub(string, start = 1L, end = -1L)`

str_sub(fruit[1:5], 1, 3)
#> [1] "app" "apr" "avo" "ban" "bel"

str_sub(fruit[1:5], end = -2)
#> [1] "appl"       "aprico"     "avocad"     "banan"      "bell peppe"

str_sub(fruit[1:5], -2)
#> [1] "le" "ot" "do" "na" "er"

str_sub(fruit[1:5], c(1, 2, 3, 1, 1), c(3, 3, 4, 3, 3))
#> [1] "app" "pr"  "oc"  "ban" "bel"

# str_locate(fruit[1:5], '(..)\\1')
# str_sub(fruit[1:5], str_locate(fruit[1:5], '(..)\\1'))

`str_subset(string, pattern, negate = FALSE)`

str_subset(fruit, 'b')
#>  [1] "banana"       "bell pepper"  "bilberry"     "blackberry"   "blackcurrant"
#>  [6] "blood orange" "blueberry"    "boysenberry"  "breadfruit"   "cloudberry"  
#> [11] "cranberry"    "cucumber"     "elderberry"

# fruit[str_detect(fruit,'b')]

`str_extract(string, pattern)`

`str_extract_all(string, pattern, simplify = FALSE)`

str_extract(c('aeaebcbc','bcaebc','abcccc'),'(..)\\1')
#> [1] "aeae" NA     "cccc"
str_extract(sentences, '\\w+(es|s)\\b')
#>  [1] "planks"    NA          NA          "days"      "is"        "lemons"   
#>  [7] "was"       "hogs"      "hours"     "stockings" "was"       "is"       
#> [13] "is"        NA          NA          "helps"     "fires"     NA         
#> [19] "across"    "bonds"

str_extract_all(c('aeaebcbc','bcaebc','abcccc'),'(..)\\1')
#> [[1]]
#> [1] "aeae" "bcbc"
#> 
#> [[2]]
#> character(0)
#> 
#> [[3]]
#> [1] "cccc"

str_extract_all(c('aeaebcbc','bcaebc','abcccc'),'(..)\\1', simplify = T)
#>      [,1]   [,2]  
#> [1,] "aeae" "bcbc"
#> [2,] ""     ""    
#> [3,] "cccc" ""

`str_match(string, pattern)` Extract matched groups from a string.

`str_match_all(string, pattern)`

sentences[1:10]
#>  [1] "The birch canoe slid on the smooth planks." 
#>  [2] "Glue the sheet to the dark blue background."
#>  [3] "It's easy to tell the depth of a well."     
#>  [4] "These days a chicken leg is a rare dish."   
#>  [5] "Rice is often served in round bowls."       
#>  [6] "The juice of lemons makes fine punch."      
#>  [7] "The box was thrown beside the parked truck."
#>  [8] "The hogs were fed chopped corn and garbage."
#>  [9] "Four hours of steady work faced us."        
#> [10] "Large size in stockings is hard to sell."

str_match(sentences[1:10], '(a|the) ([^ ]+)')
#>       [,1]         [,2]  [,3]     
#>  [1,] "the smooth" "the" "smooth" 
#>  [2,] "the sheet"  "the" "sheet"  
#>  [3,] "the depth"  "the" "depth"  
#>  [4,] "a chicken"  "a"   "chicken"
#>  [5,] NA           NA    NA       
#>  [6,] NA           NA    NA       
#>  [7,] "the parked" "the" "parked" 
#>  [8,] NA           NA    NA       
#>  [9,] NA           NA    NA       
#> [10,] NA           NA    NA

# str_extract(sentences[1:10], '(a|the) ([^ ]+)')
# str_match(sentences[1:10], '(a|the) ([^ ]+)')[,1]

str_match_all(sentences[1:5], '(a|the) ([^ ]+)')
#> [[1]]
#>      [,1]         [,2]  [,3]    
#> [1,] "the smooth" "the" "smooth"
#> 
#> [[2]]
#>      [,1]        [,2]  [,3]   
#> [1,] "the sheet" "the" "sheet"
#> [2,] "the dark"  "the" "dark" 
#> 
#> [[3]]
#>      [,1]        [,2]  [,3]   
#> [1,] "the depth" "the" "depth"
#> [2,] "a well."   "a"   "well."
#> 
#> [[4]]
#>      [,1]        [,2] [,3]     
#> [1,] "a chicken" "a"  "chicken"
#> [2,] "a rare"    "a"  "rare"   
#> 
#> [[5]]
#>      [,1] [,2] [,3]

Mutate Strings

str_sub(string, start = 1L, end = -1L, omit_na=FALSE) <- value
str_replace(string, pattern, replacement)
str_replace_all(string, pattern, replacement)
str_remove(string, pattern)
str_remove_all(string, pattern)
str_to_upper(string, locale = "en")
str_to_lower(string, locale = "en")
str_to_title(string, locale = "en")

fruit
#>  [1] "apple"        "apricot"      "avocado"      "banana"       "bell pepper" 
#>  [6] "bilberry"     "blackberry"   "blackcurrant" "blood orange" "blueberry"   
#> [11] "boysenberry"  "breadfruit"   "canary melon" "cantaloupe"   "cherimoya"   
#> [16] "cherry"       "chili pepper" "clementine"   "cloudberry"   "coconut"     
#> [21] "cranberry"    "cucumber"     "currant"      "damson"       "date"        
#> [26] "dragonfruit"  "durian"       "eggplant"     "elderberry"   "feijoa"
fruit_temp <- fruit

`str_sub(string, start = 1L, end = -1L, omit_na=FALSE) <- value`

str_sub(fruit_temp,1,3) <- 'str'
fruit_temp
#>  [1] "strle"        "stricot"      "strcado"      "strana"       "strl pepper" 
#>  [6] "strberry"     "strckberry"   "strckcurrant" "strod orange" "streberry"   
#> [11] "strsenberry"  "stradfruit"   "strary melon" "strtaloupe"   "strrimoya"   
#> [16] "strrry"       "strli pepper" "strmentine"   "strudberry"   "stronut"     
#> [21] "strnberry"    "strumber"     "strrant"      "strson"       "stre"        
#> [26] "strgonfruit"  "strian"       "strplant"     "strerberry"   "strjoa"

`str_replace(string, pattern, replacement)`

`str_replace_all(string, pattern, replacement)`

str_replace(fruit[1:5], 'a', '-')
#> [1] "-pple"       "-pricot"     "-vocado"     "b-nana"      "bell pepper"

str_replace_all(fruit[1:5], 'a', '-')
#> [1] "-pple"       "-pricot"     "-voc-do"     "b-n-n-"      "bell pepper"

`str_remove(string, pattern)`

`str_remove_all(string, pattern)`

example <- c("apple", "apap", "ap")
str_remove(example, 'ap') 
#> [1] "ple" "ap"  ""

str_remove_all(example, 'ap')
#> [1] "ple" ""    ""

str_remove_all(example, '[ap]')
#> [1] "le" ""   ""

`str_to_upper(string, locale = "en")`

`str_to_lower(string, locale = "en")`

`str_to_title(string, locale = "en")`

sentences[1:5]
#> [1] "The birch canoe slid on the smooth planks." 
#> [2] "Glue the sheet to the dark blue background."
#> [3] "It's easy to tell the depth of a well."     
#> [4] "These days a chicken leg is a rare dish."   
#> [5] "Rice is often served in round bowls."

str_to_upper(sentences[1:5])
#> [1] "THE BIRCH CANOE SLID ON THE SMOOTH PLANKS." 
#> [2] "GLUE THE SHEET TO THE DARK BLUE BACKGROUND."
#> [3] "IT'S EASY TO TELL THE DEPTH OF A WELL."     
#> [4] "THESE DAYS A CHICKEN LEG IS A RARE DISH."   
#> [5] "RICE IS OFTEN SERVED IN ROUND BOWLS."

str_to_lower(sentences[1:5])
#> [1] "the birch canoe slid on the smooth planks." 
#> [2] "glue the sheet to the dark blue background."
#> [3] "it's easy to tell the depth of a well."     
#> [4] "these days a chicken leg is a rare dish."   
#> [5] "rice is often served in round bowls."

str_to_title(sentences[1:5])
#> [1] "The Birch Canoe Slid On The Smooth Planks." 
#> [2] "Glue The Sheet To The Dark Blue Background."
#> [3] "It's Easy To Tell The Depth Of A Well."     
#> [4] "These Days A Chicken Leg Is A Rare Dish."   
#> [5] "Rice Is Often Served In Round Bowls."

Join and Split

join

str_c(..., sep = "", collapse = NULL)
str_dup(string, times)
str_glue(...)
str_glue_data(.x, ..., .na = "NA")

Split

str_split(string, pattern, n = Inf, simplify = FALSE)
str_split_fixed(string, pattern, n)

str_c(..., sep = "", collapse = NULL):

letters[1:5]
#> [1] "a" "b" "c" "d" "e"

LETTERS[1:5]
#> [1] "A" "B" "C" "D" "E"

# for one part
str_c(letters[1:5], collapse =' ')
#> [1] "a b c d e"

# for two or more part
str_c(letters[1:5], LETTERS[1:5], sep = '/')
#> [1] "a/A" "b/B" "c/C" "d/D" "e/E"

str_c(letters[1:5], LETTERS[1:5], sep = '/', collapse = ' ')
#> [1] "a/A b/B c/C d/D e/E"

str_dup(string, times):

str_dup(fruit[1:6], times = 2)
#> [1] "appleapple"             "apricotapricot"         "avocadoavocado"        
#> [4] "bananabanana"           "bell pepperbell pepper" "bilberrybilberry"

str_dup(fruit[1:6], 1:3)
#> [1] "apple"                    "apricotapricot"          
#> [3] "avocadoavocadoavocado"    "banana"                  
#> [5] "bell pepperbell pepper"   "bilberrybilberrybilberry"

`str_glue(...)`

`str_glue_data(.x, ..., .na = "NA")`

name <- 'Bob'
age <- 50
anniversary <- as.Date("1991-10-12")
str_glue(
  "My name is {name}, ",
  "my age next year is {age + 1}, ",
  "and my anniversary is {format(anniversary, '%A, %B %d, %Y')}."
)
#> My name is Bob, my age next year is 51, and my anniversary is Saturday, October 12, 1991.

str_glue(
  "My name is {name}, ",
  "and my age next year is {age + 1}.",
  name = "Joe",
  age = 40
)
#> My name is Joe, and my age next year is 41.

str_glue('the {i}th letter is {letters[1:5]}', i = 1:5)
#> the 1th letter is a
#> the 2th letter is b
#> the 3th letter is c
#> the 4th letter is d
#> the 5th letter is e

# here str_glue() is more readable than str_c()
str_c('the ', 1:5, 'th letter is ', letters[1:5])
#> [1] "the 1th letter is a" "the 2th letter is b" "the 3th letter is c"
#> [4] "the 4th letter is d" "the 5th letter is e"

str_glue('the {i}th {{letter}} is {{{letters[1:5]}}', i=1:5)
#> the 1th {letter} is {a}
#> the 2th {letter} is {b}
#> the 3th {letter} is {c}
#> the 4th {letter} is {d}
#> the 5th {letter} is {e}

str_glue_data(mtcars[1:5, ], '{hp} hp')
#> 110 hp
#> 110 hp
#> 93 hp
#> 110 hp
#> 175 hp

mtcars[1:5, ] %>% str_glue_data('the {rownames(.)} has {hp} hp')
#> the Mazda RX4 has 110 hp
#> the Mazda RX4 Wag has 110 hp
#> the Datsun 710 has 93 hp
#> the Hornet 4 Drive has 110 hp
#> the Hornet Sportabout has 175 hp

`str_split(string, pattern, n = Inf, simplify = FALSE)`

`str_split_fixed(string, pattern, n)`

str_split(string, pattern, simplify = T) is equivalent to str_split_fixed(string, pattern, n = Inf)

fruits_split <- c(
  "apples and oranges and pears and bananas",
  "pineapples and mangos and guavas"
)

str_split(fruits_split, " and ")
#> [[1]]
#> [1] "apples"  "oranges" "pears"   "bananas"
#> 
#> [[2]]
#> [1] "pineapples" "mangos"     "guavas"

str_split(fruits_split, " and ", simplify = TRUE)
#>      [,1]         [,2]      [,3]     [,4]     
#> [1,] "apples"     "oranges" "pears"  "bananas"
#> [2,] "pineapples" "mangos"  "guavas" ""


# Specify n to restrict the number of possible matches
str_split(fruits_split, " and ", n = 3)
#> [[1]]
#> [1] "apples"            "oranges"           "pears and bananas"
#> 
#> [[2]]
#> [1] "pineapples" "mangos"     "guavas"

str_split(fruits_split, " and ", n = 2)
#> [[1]]
#> [1] "apples"                        "oranges and pears and bananas"
#> 
#> [[2]]
#> [1] "pineapples"        "mangos and guavas"

# If n greater than number of pieces, no padding occurs
str_split(fruits_split, " and ", n = 5)
#> [[1]]
#> [1] "apples"  "oranges" "pears"   "bananas"
#> 
#> [[2]]
#> [1] "pineapples" "mangos"     "guavas"


# Use str_split_fixed() to return a character matrix
str_split_fixed(fruits_split, " and ", 3)
#>      [,1]         [,2]      [,3]               
#> [1,] "apples"     "oranges" "pears and bananas"
#> [2,] "pineapples" "mangos"  "guavas"

str_split_fixed(fruits_split, " and ", 4)
#>      [,1]         [,2]      [,3]     [,4]     
#> [1,] "apples"     "oranges" "pears"  "bananas"
#> [2,] "pineapples" "mangos"  "guavas" ""

Manage Lengths

str_length(string)
str_pad(string, width, side = c("left", "right", "both"), pad =" ") Pad a string
str_trunc(string, width, side = c("right", "left", "center"), ellipsis = "...") Truncate a character string.
str_trim(string, side = c("both", "left", "right")) Trim whitespace from a string
str_squish(string)

fruit
#>  [1] "apple"        "apricot"      "avocado"      "banana"       "bell pepper" 
#>  [6] "bilberry"     "blackberry"   "blackcurrant" "blood orange" "blueberry"   
#> [11] "boysenberry"  "breadfruit"   "canary melon" "cantaloupe"   "cherimoya"   
#> [16] "cherry"       "chili pepper" "clementine"   "cloudberry"   "coconut"     
#> [21] "cranberry"    "cucumber"     "currant"      "damson"       "date"        
#> [26] "dragonfruit"  "durian"       "eggplant"     "elderberry"   "feijoa"

`str_length(string)`

x <- c('adv', 'ss', 'awsd')
writeLines(x)
#> adv
#> ss
#> awsd

length(x)
#> [1] 3

str_length(x)
#> [1] 3 2 4

`str_pad(string, width, side = c("left", "right", "both"), pad =" ")` Pad a string

str_pad(c("a", "abc", "abcdef"), 10, side = 'right')
#> [1] "a         " "abc       " "abcdef    "

str_pad("a", c(5, 7, 10))
#> [1] "    a"      "      a"    "         a"

str_pad("a", 10, pad = c("-", "_", " "))
#> [1] "---------a" "_________a" "         a"

`str_trunc(string, width, side = c("right", "left", "center"), ellipsis = "...")` Truncate a character string.

str_trunc(fruit, 7)
#>  [1] "apple"   "apricot" "avocado" "banana"  "bell..." "bilb..." "blac..."
#>  [8] "blac..." "bloo..." "blue..." "boys..." "brea..." "cana..." "cant..."
#> [15] "cher..." "cherry"  "chil..." "clem..." "clou..." "coconut" "cran..."
#> [22] "cucu..." "currant" "damson"  "date"    "drag..." "durian"  "eggp..."
#> [29] "elde..." "feijoa"

str_trunc(fruit, 7, side = 'left', ellipsis = '**')
#>  [1] "apple"   "apricot" "avocado" "banana"  "**epper" "**berry" "**berry"
#>  [8] "**rrant" "**range" "**berry" "**berry" "**fruit" "**melon" "**loupe"
#> [15] "**imoya" "cherry"  "**epper" "**ntine" "**berry" "coconut" "**berry"
#> [22] "**umber" "currant" "damson"  "date"    "**fruit" "durian"  "**plant"
#> [29] "**berry" "feijoa"

`str_trim(string, side = c("both", "left", "right"))` Trim whitespace from a string

`str_squish(string)`

str_trim(c('    a', 'b     ', 'c   d'))
#> [1] "a"     "b"     "c   d"

str_trim(c('    a', 'b     ', 'c   d'), side = 'left')
#> [1] "a"      "b     " "c   d"

str_squish(c('    a', 'b     ', 'c   d'))
#> [1] "a"   "b"   "c d"

#str_replace_all(c('    a', 'b     ', 'c   d'), '\\s', '')

Order Strings

str_order(x, decreasing = FALSE, na_last = TRUE, locale = "en", numeric = FALSE, ...)
str_sort(x, decreasing = FALSE, na_last = TRUE, locale = "en", numeric = FALSE, ...)

bar <- c('b', 'c', 'a', 'd')

str_order(bar)
#> [1] 3 1 2 4

bar[str_order(bar)]
#> [1] "a" "b" "c" "d"

x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
#> [1] "100a10" "100a5"  "2a"     "2b"
str_sort(x, numeric = TRUE)
#> [1] "2a"     "2b"     "100a5"  "100a10"

Exercise

Use str_length() and str_sub() to extract the middle character from a string. What will you do if the string has an even number of characters?
Find all words that start with a vowel and end with a consonant (words).

Helpers

More details about regular expressions: CRAN | stringr/Regular expressions

Reference

cheatsheet - stringr

GitHub - tidyverse/stringr

14 Strings | R for Data Science

Simple, Consistent Wrappers for Common String Operations • stringr

stringr

Overview

generate data

Regular Expressions

str_view(string, pattern, match = NA)

str_view_all(string, pattern, match = NA)

special character———[ ] \ ^ $ . | ? * + ( )

regex(pattern, ignore_case = FALSE, multiline = FALSE, comments = FALSE, dotall = FALSE)

Detect Matches

str_detect(string, pattern, negate = FALSE)

str_which(string, pattern, negate = FALSE)

str_count(string, pattern = "")

str_locate(string, pattern)

str_locate_all(string, pattern)

Subset Strings

str_sub(string, start = 1L, end = -1L)

str_subset(string, pattern, negate = FALSE)

str_extract(string, pattern)

str_extract_all(string, pattern, simplify = FALSE)

str_match(string, pattern) Extract matched groups from a string.

str_match_all(string, pattern)

Mutate Strings

str_sub(string, start = 1L, end = -1L, omit_na=FALSE) <- value

str_replace(string, pattern, replacement)

str_replace_all(string, pattern, replacement)

str_remove(string, pattern)

str_remove_all(string, pattern)

str_to_upper(string, locale = "en")

str_to_lower(string, locale = "en")

str_to_title(string, locale = "en")