Introduction to R ################# #1. Entering data ################# #(a) using scan('filename') > data<-matrix(scan('A:/remisNH.txt'),byrow=T,ncol=5) Read 105 items > data [,1] [,2] [,3] [,4] [,5] [1,] 1 1 1 10 1 [2,] 2 2 22 7 1 ... [21,] 21 2 8 10 0 #With this approach, you have to define all the variables by yourself. #For example, > pair<-data[,1] > pair [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 > rem<-data[,2] > rem [1] 1 2 2 2 2 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 #(b) using read.table > data<-read.table('A:/remisWH.txt', header=T) > attach(data) > names(data) [1] "pair" "rem" "timep" "timet" "statust" > rem [1] 1 2 2 2 2 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 #The advantage is that you do not have to name the columns if the data file #comes with a header line. #(c) entering from the keyboard > pair<-1:21 > pair [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 > rem<-c(1,2,2,2,2,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2) > rem [1] 1 2 2 2 2 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 ################################## #2. Data checking and manipulation ################################## #(a) subsets of data > data[1,] pair rem timep timet statust 1 1 1 1 10 1 > data[,2:3] rem timep 1 1 1 2 2 22 ... 21 2 8 > data[1:5,2:3] rem timep 1 1 1 2 2 22 ... 5 2 8 #you can change some part of data > x<-1:5 > x [1] 1 2 3 4 5 > x[3]<-6 > x [1] 1 2 6 4 5 #(b) some simple functions > dim(data) [1] 21 5 > length(rem) [1] 21 > table(rem) rem 1 2 5 16 > min(timet) [1] 6 # also try max, mean, median, var > sd(timet) [1] 9.999524 3############################################# #3. Arithmetric, matrix and logical operations ############################################## #arithmetric operations: + - * / ^ > 2+3 [1] 5 > 2^3 [1] 8 #matrix operations: t, %*% > A<-matrix(1:6,ncol=2) > A [,1] [,2] [1,] 1 4 [2,] 2 5 [3,] 3 6 > t(A) #transpose [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6 > B<-cbind(rep(1,3),c(1,0,0)) > B [,1] [,2] [1,] 1 1 [2,] 1 0 [3,] 1 0 > t(A) %*% B [,1] [,2] [1,] 6 1 [2,] 15 4 #logical operations: >,<,==,!=,<=,>=,!, &, | > (7>5) [1] TRUE > !(7>5) [1] FALSE > (1:10>=5) [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE > sum(1:10>=5) [1] 6 > sum(c(1:3,2:5)==3) [1] 2 > x<-1:10 > (x>=5 & x<7) [1] FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE > (x>=5 | x==2) [1] FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE ############################################################################ #4. cumulative probability, percentile and random variates for distributions ############################################################################ > qnorm(.975) [1] 1.959964 > pnorm(1) [1] 0.8413447 > rnorm(10) [1] 0.9594821 -1.0000935 0.1733313 -0.2323405 -0.1728959 0.4997507 [7] 0.2924879 -0.1303746 0.7994302 0.4397668 #p,q and r can be added to such distributions as unif, exp, t, chisq, f, gamma, # weibull, etc. If you are not sure about the usage, check the help files ####################### #5. making figures in R ####################### #This is arguably the best feature of Splus/R compared to other stat packages plot(x,y) hist(x) abline(0,1) ##################################### #Appendix: some very useful functions ##################################### #rep > rep(3,10) [1] 3 3 3 3 3 3 3 3 3 3 > rep(1:2,5) [1] 1 2 1 2 1 2 1 2 1 2 > rep(1:2,c(3,4)) [1] 1 1 1 2 2 2 2 #cbind, rbind, apply, ifelse #these are very useful for avoiding loops > x<-1:4 > y<-11:14 > cbind(x,y) x y [1,] 1 11 [2,] 2 12 [3,] 3 13 [4,] 4 14 > rbind(x,y) [,1] [,2] [,3] [,4] x 1 2 3 4 y 11 12 13 14 > A<-rbind(x,y) > apply(A,1,sum) # 1 for row operations x y 10 50 > apply(A,2,sum) # 2 for column operations [1] 12 14 16 18 > x<-c(1,5,2,99,4,6,3,99,2) > x<-ifelse(x!=99,x,NA) > x [1] 1 5 2 NA 4 6 3 NA 2 #floor,ceiling,round > floor(3.4) [1] 3 > ceiling(3.4) [1] 4 > round(3.14159,2) [1] 3.14 #unique,sort,order,duplicated > x<-c(1,3,5,3,2,1,6) > x [1] 1 3 5 3 2 1 6 > unique(x) [1] 1 3 5 2 6 > sort(x) [1] 1 1 2 3 3 5 6 > duplicated(x) [1] FALSE FALSE FALSE TRUE FALSE TRUE FALSE > order(x) [1] 1 6 5 2 4 3 7 #using order to sort a matrix according to one of the columns > A<-cbind(x,c(7:10,1:3)) > A x [1,] 1 7 [2,] 3 8 [3,] 5 9 [4,] 3 10 [5,] 2 1 [6,] 1 2 [7,] 6 3 > ord<-order(x) > A[ord,] x [1,] 1 7 [2,] 1 2 [3,] 2 1 [4,] 3 8 [5,] 3 10 [6,] 5 9 [7,] 6 3