Using R as a calculator:
2*8+3
## [1] 19
2^3*5-3
## [1] 37
a <- 2^3*5-3
a-6
## [1] 31
Using print and paste:
print("It's a good day!")
## [1] "It's a good day!"
print(paste("I am", a, "years old."))
## [1] "I am 37 years old."
print(paste("I am", a, "years old.", sep="-"))
## [1] "I am-37-years old."
Getting help from R:
help(read.table)
?read.table
Creating vectors:
weight.1 <- c(112,106,121,119)
weight.2 <- c(122,136,133,140)
weight.2-weight.1
## [1] 10 30 12 21
Testing, and log variables:
fruits <- c("Apple","Orange","Carrot")
fruits=="Apple"
## [1] TRUE FALSE FALSE
isApple <- fruits=="Apple"
isApple
## [1] TRUE FALSE FALSE
Factors are categorical variables. Those are necessary to distinguish between categorical and numerical variables:
pain <- c(0,3,2,2,1)
pain
## [1] 0 3 2 2 1
levels(pain)
## NULL
levels(as.factor(pain))
## [1] "0" "1" "2" "3"
levels(pain) <- c("none","mild","medium","severe")
levels(pain)
## [1] "none" "mild" "medium" "severe"
pain
## [1] 0 3 2 2 1
## attr(,"levels")
## [1] "none" "mild" "medium" "severe"
Sequences:
4:9
## [1] 4 5 6 7 8 9
seq(4,9)
## [1] 4 5 6 7 8 9
seq(4,9,1)
## [1] 4 5 6 7 8 9
seq(4,10,2)
## [1] 4 6 8 10
Replicates:
oops <- c(7,9,13)
rep(oops,3)
## [1] 7 9 13 7 9 13 7 9 13
rep(oops,1:3)
## [1] 7 9 9 13 13 13
rep(c("male","female"),c(4,6))
## [1] "male" "male" "male" "male" "female" "female" "female"
## [8] "female" "female" "female"
rep(c("male","female"),each=5)
## [1] "male" "male" "male" "male" "male" "female" "female"
## [8] "female" "female" "female"
Lists are useful to combine different collection of objects.
intake.pre <- c(5260,5470,5640,6180,6390,6515,6805,7515,7515,8230,8770)
intake.post <- c(3910,4220,3885,5160,5645,4680,5265,5975,6790,6900,7335)
mylist <- list(before=intake.pre, after=intake.post)
mylist
## $before
## [1] 5260 5470 5640 6180 6390 6515 6805 7515 7515 8230 8770
##
## $after
## [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335
mylist$after
## [1] 3910 4220 3885 5160 5645 4680 5265 5975 6790 6900 7335
Matrices:
x <- 1:12
dim(x) <- c(3,4)
x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
y <- matrix(1:12,nrow=3,byrow=TRUE)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
dim(x)
## [1] 3 4
Data frame is the most useful way to deal with a table-like data. Creating a data frame:
testdata <- data.frame(id=paste('pig',1:9,sep=""),pre.weaning=101:109,post.weaning=209:201)
testdata
## id pre.weaning post.weaning
## 1 pig1 101 209
## 2 pig2 102 208
## 3 pig3 103 207
## 4 pig4 104 206
## 5 pig5 105 205
## 6 pig6 106 204
## 7 pig7 107 203
## 8 pig8 108 202
## 9 pig9 109 201
testdata$id
## [1] pig1 pig2 pig3 pig4 pig5 pig6 pig7 pig8 pig9
## Levels: pig1 pig2 pig3 pig4 pig5 pig6 pig7 pig8 pig9
testdata$post.weaning
## [1] 209 208 207 206 205 204 203 202 201
testdata$post.weaning[2]
## [1] 208
Slicing:
test <- data.frame(x=1:5,y=6:10,z=11:15)
test
## x y z
## 1 1 6 11
## 2 2 7 12
## 3 3 8 13
## 4 4 9 14
## 5 5 10 15
test[,2]
## [1] 6 7 8 9 10
test[2,]
## x y z
## 2 2 7 12
test[2,2]
## [1] 7
test[2,c(1,3)]
## x z
## 2 2 12
Indexing:
colnames(test) <- c("xx","yy","zz")
rownames(test) <- c("a","b","c","d","e")
test
## xx yy zz
## a 1 6 11
## b 2 7 12
## c 3 8 13
## d 4 9 14
## e 5 10 15
test[,"yy"]
## [1] 6 7 8 9 10
test["b",]
## xx yy zz
## b 2 7 12
test["b","yy"]
## [1] 7
test["b",c("xx","zz")]
## xx zz
## b 2 12
subset(test, select=xx)
## xx
## a 1
## b 2
## c 3
## d 4
## e 5
subset(test, select=-xx)
## yy zz
## a 6 11
## b 7 12
## c 8 13
## d 9 14
## e 10 15
Slicing & indexing:
test[1:3,c("xx","zz")]
## xx zz
## a 1 11
## b 2 12
## c 3 13
Dropping data:
test
## xx yy zz
## a 1 6 11
## b 2 7 12
## c 3 8 13
## d 4 9 14
## e 5 10 15
test[,-1]
## yy zz
## a 6 11
## b 7 12
## c 8 13
## d 9 14
## e 10 15
test[-1,]
## xx yy zz
## b 2 7 12
## c 3 8 13
## d 4 9 14
## e 5 10 15
subset(test,select=-xx)
## yy zz
## a 6 11
## b 7 12
## c 8 13
## d 9 14
## e 10 15
Conditional selection:
test
## xx yy zz
## a 1 6 11
## b 2 7 12
## c 3 8 13
## d 4 9 14
## e 5 10 15
test[test$xx!=3,]
## xx yy zz
## a 1 6 11
## b 2 7 12
## d 4 9 14
## e 5 10 15
test[test$xx>3,]
## xx yy zz
## d 4 9 14
## e 5 10 15
test[test$xx>=3 & test$zz<=14,]
## xx yy zz
## c 3 8 13
## d 4 9 14
test[test$xx>3 | test$zz<12,]
## xx yy zz
## a 1 6 11
## d 4 9 14
## e 5 10 15
Read data file:
To read a space separated file like data1.txt
, with a header row: Depending to the file that you are reading, options can be different.
data1 <- read.table(file.choose(), header=TRUE, colClasses=c("character","character","numeric","numeric"))
Get some information about the file:
colnames(data1)
## [1] "ID" "proteinl" "birthw" "weanw"
head(data1)
## ID proteinl birthw weanw
## 1 anim1 high 6.1 23.1
## 2 anim2 high 4.8 16.2
## 3 anim3 high 5.2 16.1
## 4 anim4 high 5.4 15.3
## 5 anim5 high 4.4 18.6
## 6 anim6 high 6.1 23.7
head(data1,10)
## ID proteinl birthw weanw
## 1 anim1 high 6.1 23.1
## 2 anim2 high 4.8 16.2
## 3 anim3 high 5.2 16.1
## 4 anim4 high 5.4 15.3
## 5 anim5 high 4.4 18.6
## 6 anim6 high 6.1 23.7
## 7 anim7 high 7.3 21.6
## 8 anim8 high 4.1 17.8
## 9 anim9 high 5.1 23.1
## 10 anim10 high 4.8 18.2
tail(data1)
## ID proteinl birthw weanw
## 495 anim495 low 5.2 16.0
## 496 anim496 low 3.7 12.0
## 497 anim497 low 5.1 15.0
## 498 anim498 low 5.0 13.0
## 499 anim499 low 4.4 13.9
## 500 anim500 low 6.1 15.9
dim(data1)
## [1] 500 4
ncol(data1)
## [1] 4
nrow(data1)
## [1] 500
summary(data1)
## ID proteinl birthw weanw
## Length:500 Length:500 Min. :2.500 Min. :11.20
## Class :character Class :character 1st Qu.:4.300 1st Qu.:14.90
## Mode :character Mode :character Median :5.000 Median :16.60
## Mean :4.999 Mean :17.12
## 3rd Qu.:5.700 3rd Qu.:19.10
## Max. :7.500 Max. :25.00
For the solutions of the home work, check the presentation file.
Write data file:
Let say we want to write the first 100 rows of
data1
in a space separated filedata1small.txt
, with header row, no quotes, and no row names: Depending to the file that you want to write, options can be different.
write.table(data1[1:100,], file="data1small.txt", quote=FALSE, row.names=FALSE)
getwd()
You can write notes and outputs to an external file. Consider the following example:
write(file="log.txt", print("This is my log file."))
write(file="log.txt", print(paste("Birth weight average =", mean(data1$birthw))), append=TRUE)
write(file="log.txt", print(paste("Weaning weight average =", mean(data1$weanw))), append=TRUE)
In log.txt
, you would have:
This is my log file.
Birth weight average = 4.9986
Weaning weight average = 17.1162"
To view a data in RStudio, either click the data in the Environment window or:
View(data1)
To start editing the data manually:
edit(data1)
You can get many descriptive statistics from your data. For example:
mean(data1$birthw)
## [1] 4.9986
cor(data1$birthw, data1$weanw)
## [1] 0.4209551
Merging 2 data frames (different options are used. Checkout the differences):
df1 <- data.frame(anim=paste0('anim',1:9), food=19:11)
df2 <- data.frame(anim=paste0('anim',2:10),water=22:30)
df1
## anim food
## 1 anim1 19
## 2 anim2 18
## 3 anim3 17
## 4 anim4 16
## 5 anim5 15
## 6 anim6 14
## 7 anim7 13
## 8 anim8 12
## 9 anim9 11
df2
## anim water
## 1 anim2 22
## 2 anim3 23
## 3 anim4 24
## 4 anim5 25
## 5 anim6 26
## 6 anim7 27
## 7 anim8 28
## 8 anim9 29
## 9 anim10 30
df3 <- merge(df1, df2, by="anim")
df3
## anim food water
## 1 anim2 18 22
## 2 anim3 17 23
## 3 anim4 16 24
## 4 anim5 15 25
## 5 anim6 14 26
## 6 anim7 13 27
## 7 anim8 12 28
## 8 anim9 11 29
df3 <- merge(df1, df2, by="anim", all.x=TRUE)
df3
## anim food water
## 1 anim1 19 NA
## 2 anim2 18 22
## 3 anim3 17 23
## 4 anim4 16 24
## 5 anim5 15 25
## 6 anim6 14 26
## 7 anim7 13 27
## 8 anim8 12 28
## 9 anim9 11 29
df3 <- merge(df1, df2, by="anim", all.y=TRUE)
df3
## anim food water
## 1 anim2 18 22
## 2 anim3 17 23
## 3 anim4 16 24
## 4 anim5 15 25
## 5 anim6 14 26
## 6 anim7 13 27
## 7 anim8 12 28
## 8 anim9 11 29
## 9 anim10 NA 30
What if the by column has different names in the 2 data sets?!
colnames(df2)[1] <- "animal"
colnames(df1)
## [1] "anim" "food"
colnames(df2)
## [1] "animal" "water"
df3 <- merge(df1, df2, by.x="anim", by.y="animal")
df3
## anim food water
## 1 anim2 18 22
## 2 anim3 17 23
## 3 anim4 16 24
## 4 anim5 15 25
## 5 anim6 14 26
## 6 anim7 13 27
## 7 anim8 12 28
## 8 anim9 11 29
cbind
To do cbind
, the data frames should have the same number of rows.
df1
## anim food
## 1 anim1 19
## 2 anim2 18
## 3 anim3 17
## 4 anim4 16
## 5 anim5 15
## 6 anim6 14
## 7 anim7 13
## 8 anim8 12
## 9 anim9 11
df2
## animal water
## 1 anim2 22
## 2 anim3 23
## 3 anim4 24
## 4 anim5 25
## 5 anim6 26
## 6 anim7 27
## 7 anim8 28
## 8 anim9 29
## 9 anim10 30
df3 <- cbind(df1, df2)
df3
## anim food animal water
## 1 anim1 19 anim2 22
## 2 anim2 18 anim3 23
## 3 anim3 17 anim4 24
## 4 anim4 16 anim5 25
## 5 anim5 15 anim6 26
## 6 anim6 14 anim7 27
## 7 anim7 13 anim8 28
## 8 anim8 12 anim9 29
## 9 anim9 11 anim10 30
rbind
To do rbind
, the data frames should have the same number of columns and the same column names.
colnames(df1) <- c("anim","food_water")
colnames(df2) <- c("anim","food_water")
df3 <- rbind(df1, df2)
df3
## anim food_water
## 1 anim1 19
## 2 anim2 18
## 3 anim3 17
## 4 anim4 16
## 5 anim5 15
## 6 anim6 14
## 7 anim7 13
## 8 anim8 12
## 9 anim9 11
## 10 anim2 22
## 11 anim3 23
## 12 anim4 24
## 13 anim5 25
## 14 anim6 26
## 15 anim7 27
## 16 anim8 28
## 17 anim9 29
## 18 anim10 30
sort
sort
is not made for data frames! It is for vectors or a single column in a data frame.
df1 <- data.frame(anim=paste0('anim',1:9), food=19:11)
df2 <- data.frame(anim=paste0('anim',2:10),water=22:30)
df3 <- merge(df1, df2, all.x=TRUE)
df3
## anim food water
## 1 anim1 19 NA
## 2 anim2 18 22
## 3 anim3 17 23
## 4 anim4 16 24
## 5 anim5 15 25
## 6 anim6 14 26
## 7 anim7 13 27
## 8 anim8 12 28
## 9 anim9 11 29
sort(df3$food)
## [1] 11 12 13 14 15 16 17 18 19
sort(df3$food, decreasing=TRUE)
## [1] 19 18 17 16 15 14 13 12 11
order
Example of the order
command:
df3[order(df3$food),]
## anim food water
## 9 anim9 11 29
## 8 anim8 12 28
## 7 anim7 13 27
## 6 anim6 14 26
## 5 anim5 15 25
## 4 anim4 16 24
## 3 anim3 17 23
## 2 anim2 18 22
## 1 anim1 19 NA
df3[order(-df3$food),]
## anim food water
## 1 anim1 19 NA
## 2 anim2 18 22
## 3 anim3 17 23
## 4 anim4 16 24
## 5 anim5 15 25
## 6 anim6 14 26
## 7 anim7 13 27
## 8 anim8 12 28
## 9 anim9 11 29
df3$group <- rep(1:3,3)
df3
## anim food water group
## 1 anim1 19 NA 1
## 2 anim2 18 22 2
## 3 anim3 17 23 3
## 4 anim4 16 24 1
## 5 anim5 15 25 2
## 6 anim6 14 26 3
## 7 anim7 13 27 1
## 8 anim8 12 28 2
## 9 anim9 11 29 3
df3[order(-df3$group, df3$food),]
## anim food water group
## 9 anim9 11 29 3
## 6 anim6 14 26 3
## 3 anim3 17 23 3
## 8 anim8 12 28 2
## 5 anim5 15 25 2
## 2 anim2 18 22 2
## 7 anim7 13 27 1
## 4 anim4 16 24 1
## 1 anim1 19 NA 1
Matrix operations (not presented, but slides are available for those who are interested.)