This is an old revision of the document!
Table of Contents
> pi [1] 3.141593 > sqrt(2) [1] 1.414214
When you enter expressions like that, R evaluates the expression and then implicitly calls the print function. So the previous example is identical to this:
> print(pi) [1] 3.141593 > print(sqrt(2)) [1] 1.414214
The print function has a significant limitation, however: it prints only one object at a time. Trying to print multiple items gives this mind-numbing error message:
> print("The zero occurs at", 2*pi, "radians.") Error in print.default("The zero occurs at", 2 * pi, "radians.") : unimplemented type 'character' in 'asLogical'
Instead, use cat
> cat("The zero occurs at", 2*pi, "radians.", "\n") The zero occurs at 6.283185 radians.
Note: space attached, line feed not.
A simple vector
> fib <- c(0,1,1,2,3,5,8,13,21,34) > cat("The first few Fibonacci numbers are:", fib, "...\n") The first few Fibonacci numbers are: 0 1 1 2 3 5 8 13 21 34 ...
A serious limitation, however, is that it cannot print compound data structures such as matrices and lists.
Variables
> variable_name <- 3
free from declaration:
> x <- 3 > print(x) [1] 3 > x <- c("fee", "fie", "foe", "fum") > print(x) [1] "fee" "fie" "foe" "fum"
List
> ls() character(0)
> x <- 10 > y <- 50 > z <- c("three", "blind", "mice") > f <- function(n,p) sqrt(p*(1-p)/n) > ls() [1] "f" "x" "y" "z"
> ls.str() f : function (n, p) x : num 10 y : num 50 z : chr [1:3] "three" "blind" "mice"
hidden variable with “.”
> .hidvar <- 10 > ls() [1] "f" "x" "y" "z" > ls(all.names=TRUE) [1] ".hidvar" "f" "x" "y" "z"
Deleting Variable
> x <- 2*pi > x [1] 6.283185 > rm(x) > x Error: object "x" not found
Note: no “undo”
Wipe out variables in a session:
> ls() [1] "f" "x" "y" "z" > rm(list=ls()) > ls() character(0)
Vector
> c(1,1,2,3,5,8,13,21) [1] 1 1 2 3 5 8 13 21 > c(1*pi, 2*pi, 3*pi, 4*pi) [1] 3.141593 6.283185 9.424778 12.566371 > c("Everyone", "loves", "stats.") [1] "Everyone" "loves" "stats." > c(TRUE,TRUE,FALSE,TRUE) [1] TRUE TRUE FALSE TRUE
If the arguments to c(…) are themselves vectors, it flattens them and combines them into one single vector:
> v1 <- c(1,2,3) > v2 <- c(4,5,6) > c(v1,v2) [1] 1 2 3 4 5 6
> v1 <- c(1,2,3) > v3 <- c("A","B","C") > c(v1,v3) [1] "1" "2" "3" "A" "B" "C"
> c(3.1415, "foo") [1] "3.1415" "foo" > mode(c(3.1415, "foo")) [1] "character"
Basic (descriptive) Statistics
mean, median, standard deviation, variance, correlation, or covariance.
mean(x) median(x) sd(x) var(x) cor(x, y) cov(x, y)
Variable x, y should be numeric (number variable, see level of measurement)
> x <- c(0,1,1,2,3,5,8,13,21,34) > mean(x) [1] 8.8 > median(x) [1] 4 > sd(x) [1] 11.03328 > var(x) [1] 121.7333 > x <- c(0,1,1,2,3,5,8,13,21,34) > y <- log(x+1) > cor(x,y) [1] 0.9068053 > cov(x,y) [1] 11.49988
> x <- c(0,1,1,2,3,NA) > mean(x) [1] NA > sd(x) [1] NA > x <- c(0,1,1,2,3,NA) > mean(x, na.rm=TRUE) [1] 1.4 > sd(x, na.rm=TRUE) [1] 1.140175
data
small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406) medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692) big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814) dframe <- data.frame(small, medium, big)
> print(dframe) small medium big 1 0.6739635 10.526448 99.83624 2 1.5524619 9.205156 100.70852 3 0.3250562 11.427756 99.73202 4 1.2143595 8.533180 98.53608 5 1.3107692 9.763317 100.74444 6 2.1739663 9.806662 98.58961 7 1.6187899 9.150245 100.46707 8 0.8872657 10.058465 99.88068 9 1.9170283 9.182330 100.46724 10 0.7767406 7.949692 100.49814 > mean(dframe) small medium big 1.245040 9.560325 99.946003 > sd(dframe) small medium big 0.5844025 0.9920281 0.8135498
> var(dframe) small medium big small 0.34152627 -0.21516416 -0.04005275 medium -0.21516416 0.98411974 -0.09253855 big -0.04005275 -0.09253855 0.66186326 > cor(dframe) small medium big small 1.00000000 -0.3711367 -0.08424345 medium -0.37113670 1.0000000 -0.11466070 big -0.08424345 -0.1146607 1.00000000 > cov(dframe) small medium big small 0.34152627 -0.21516416 -0.04005275 medium -0.21516416 0.98411974 -0.09253855 big -0.04005275 -0.09253855 0.66186326
Sequence
> 1:5 [1] 1 2 3 4 5 > seq(from=1, to=5, by=2) [1] 1 3 5 > rep(1, times=5) [1] 1 1 1 1 1 > seq(from=0, to=20, length.out=5) [1] 0 5 10 15 20 > seq(from=0, to=100, length.out=5) [1] 0 25 50 75 100
Comparing Vectors
> a <- 3 > a == pi # Test for equality [1] FALSE > a != pi # Test for inequality [1] TRUE > a < pi [1] TRUE > a > pi [1] FALSE > a <= pi [1] TRUE > a >= pi [1] FALSE
> v <- c( 3, pi, 4) > w <- c(pi, pi, pi) > v == w # Compare two 3-element vectors [1] FALSE TRUE FALSE # Result is a 3-element vector > v != w [1] TRUE FALSE TRUE > v < w [1] TRUE FALSE FALSE > v <= w [1] TRUE TRUE FALSE > v > w [1] FALSE FALSE TRUE > v >= w [1] FALSE TRUE TRUE > v <- c(3, pi, 4) > v == pi # Compare a 3-element vector against one number [1] FALSE TRUE FALSE > v != pi [1] TRUE FALSE TRUE . . .
> v <- c(3, pi, 4) > any(v == pi) # Return TRUE if any element of v equals pi [1] TRUE > all(v == 0) # Return TRUE if all elements of v are zero [1] FALSE
Selecting Vector Elements
> fib <- c(0,1,1,2,3,5,8,13,21,34) > fib [1] 0 1 1 2 3 5 8 13 21 34 > fib[1] [1] 0 > fib[2] [1] 1 > fib[3] [1] 1 > fib[4] [1] 2 > fib[5] [1] 3 > fib[1:3] # Select elements 1 through 3 [1] 0 1 1 > fib[4:9] # Select elements 4 through 9 [1] 2 3 5 8 13 21 > fib[c(1,2,4,8)] [1] 0 1 2 13 > fib[-1] # Ignore first element [1] 1 1 2 3 5 8 13 21 34 > fib[1:3] # As before [1] 0 1 1 > fib[-(1:3)] # Invert sign of index to exclude instead of select [1] 2 3 5 8 13 21 34
> fib < 10 # This vector is TRUE wherever fib is less than 10 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE > fib[fib < 10] # Use that vector to select elements less than 10 [1] 0 1 1 2 3 5 8 > fib %% 2 == 0 # This vector is TRUE wherever fib is even [1] TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE > fib[fib %% 2 == 0] # Use that vector to select the even elements [1] 0 2 8 34
v[ v > median(v) ] Select all elements in the lower and upper 5% v[ (v < quantile(v,0.05)) | (v > quantile(v,0.95)) ] Select all elements that exceed ±2 standard deviations from the mean v[ abs(v-mean(v)) > 2*sd(v) ] Select all elements that are neither NA nor NULL v[ !is.na(v) & !is.null(v) ]
> years <- c(1960, 1964, 1976, 1994) > names(years) <- c("Kennedy", "Johnson", "Carter", "Clinton") > years Kennedy Johnson Carter Clinton 1960 1964 1976 1994 > years["Carter"] Carter 1976 > years["Clinton"] Clinton 1994 > years[c("Carter","Clinton")] Carter Clinton 1976 1994
Performing Vector Arithmetic
> v <- c(11,12,13,14,15) > w <- c(1,2,3,4,5) > v + w [1] 12 14 16 18 20 > v - w [1] 10 10 10 10 10 > v * w [1] 11 24 39 56 75 > v / w [1] 11.000000 6.000000 4.333333 3.500000 3.000000 > w ^ v [1] 1 4096 1594323 268435456 30517578125
> w [1] 1 2 3 4 5 > mean(w) [1] 3 > w - mean(w) [1] -2 -1 0 1 2 > w [1] 1 2 3 4 5 > sd(w) [1] 1.581139 > (w - mean(w)) / sd(w) [1] -1.2649111 -0.6324555 0.0000000 0.6324555 1.2649111
> w [1] 1 2 3 4 5 > sqrt(w) [1] 1.000000 1.414214 1.732051 2.000000 2.236068 > log(w) [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 > sin(w) [1] 0.8414710 0.9092974 0.1411200 -0.7568025 -0.9589243
Operator | Meaning | See also |
---|---|---|
[ [[ | Indexing | Recipe 2.9 |
:: ::: | Access variables in a name space | |
$ @ | Component extraction, slot extraction | |
^ | Exponentiation (right to left) | |
- + | Unary minus and plus | |
: | Sequence creation | Recipe 2.7, Recipe 7.14 |
%any% | Special operators | Discussion |
* / | Multiplication, division | Discussion |
+ - | Addition, subtraction | |
== != < > ⇐ >= | Comparison | Recipe 2.8 |
! | Logical negation | |
& && | Logical “and”, short-circuit “and” | |
| || | Logical “or”, short-circuit “or” | |
~ | Formula | Recipe 11.1 |
→ →> | Rightward assignment | Recipe 2.2 |
= | Assignment (right to left) | Recipe 2.2 |
← «- | Assignment (right to left) | Recipe 2.2 |
? | Help | Recipe 1.7 |
%% Modulo operator %/% Integer division %*% Matrix multiplication %in% Returns TRUE if the left operand occurs in its right operand; FALSE otherwise
Defining a Function
function(param1, ..., paramN) { expr1 . . . exprM }
> cv <- function(x) sd(x)/mean(x) > cv(1:10) [1] 0.5504819 > cv <- function(x) sd(x)/mean(x) > lapply(lst, cv) > gcd <- function(a,b) { + if (b == 0) return(a) + else return(gcd(b, a %% b)) + }