-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcgu1.R
More file actions
73 lines (48 loc) · 2.13 KB
/
cgu1.R
File metadata and controls
73 lines (48 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#########################
# Loren Collingwood #
# UC Riverside #
# CGU Text Presentation #
# Text Manipulation #
# Date: 5/22/2019 #
#########################
library(stringr)
shopping_list <- c("apples x45!", "loaf of bread", "Bag of sugar", "milk x2 or x3")
#extract the first number in a string; remember: NA in R denotes a missing value.
str_extract(shopping_list, "\\d")
#extract first lower case character in a string
str_extract(shopping_list, "[a-z]")
#extract lower case characters one or more times (note the "+" symbol after "[a-z]")
str_extract(shopping_list, "[a-z]+")
#extract up to four lower case letters
str_extract(shopping_list, "[a-z]{1,4}")
#extract up to four upper or lower case letters
str_extract(shopping_list, "[A-z]{1,4}")
#extract words smaller than or equal to four letters
str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
#extract all matches
str_extract_all(shopping_list, "[A-z]+")
str_extract_all(shopping_list, "\\d")
#note that str_extract_all has a list of character strings as output.
#This can be simplified into a character matrix using the simplify command
str_extract_all(shopping_list, "\\b[a-z]+\\b", simplify = TRUE)
str_extract_all(shopping_list, "\\d", simplify = TRUE)
unlist(str_extract_all(shopping_list, "\\b[a-z]+\\b"))
#replace first match
str_replace(shopping_list, "[aeiou]", "-")
#replace all matches
str_replace_all(shopping_list, "[aeiou]", "-")
#upper case
str_to_upper(shopping_list)
#lower case
str_to_lower(shopping_list)
#In R, you write regular expressions as strings, sequences of characters surrounded by quotes ("“) or single
#quotes (”). Characters like +, ?, ˆ, and . have a special meaning in regular expressions and cannot be
#represented directly in an R string (see the RegEx cheat sheet for more examples). In order to match them
#literally, they need to be preceded by two backslashes: “\”.
name_list <- c("Jo.hn", "Anna.", "Si.+si")
#compare the output of these two calls
str_replace(name_list, ".", "-")
str_replace(name_list, "\\.", "-")
#compare the output of these two calls
str_replace(name_list, ".+", "-")
str_replace(name_list, "\\.\\+", "-")