Introduction to R 2010

From BioDivBorneo2010

Jump to: navigation, search

Basic Intro

help.start()
a <- 1
a
b <- c(1,2,3,4)
b
d <- c(2,3,4,5)
d
rbind(b,d)
cbind(b,d)
d[2]
rbind(b,d)
e <- rbind(b,d)
e
e[1,3]
ls()
e[,2]
b
2:4
100:105
d
d[2:4]
d
d[-4]
x <- c(1,1,0,0,1,1,1)
x
y <- c(12,24,6,4,13,24,35)
y
y > 10
y[y > 10]
x
y
y[x>0]
y[x==1]
z <- c("a","a","a","b","c","a","b")
z
y[z=="b"]
rbind(y,z)
data.frame(y,z)
q <- data.frame(y,z)
q
q$y
q$y[2]

With Openoffice Calc, make a simple data file. Save as Text CSV, ticking the box to edit the filter settings. Select the | character as the field delimiter and delete the " from text delimited. Open the resulting file in a text editor. You should see:

gap|killer|ht
n|b|1
n|b|2
n|b|3
y|b|4
y|b|5
y|b|6
n|a|7
n|a|8
n|a|9
y|a|12
y|a|13
y|a|14

Go back into R:

read.table("D/mydata.csv")
read.table("D/mydata.csv", sep="|")
read.table("D/mydata.csv", sep="|", header=T)
data <- read.table("D/mydata.csv", sep="|", header=T)
data
data$ht
mean(data$ht)
plot(data$ht ~ data$gap)

rnorm(100)
hist(rnorm(100))
hist(rnorm(1000,mean=50,sd=6))
hist(rnorm(1000,mean=50,sd=1))
rnorm(100,mean=45,sd=5)
s1 <- rnorm(100,mean=45,sd=5)
s2 <- rnorm(100,mean=55,sd=5)
t.test(s1,s2)
s1 <- rnorm(100,mean=45,sd=15)
s2 <- rnorm(100,mean=55,sd=15)
t.test(s1,s2)
s1 <- rnorm(100,mean=45,sd=25)
s2 <- rnorm(100,mean=55,sd=25)
t.test(s1,s2)

data
data$ht
data$ht[data$gap=="y"]
data$ht[data$gap=="n"]
t.test(data$ht[data$gap=="n"],data$ht[data$gap=="y"])
t.test(data$ht ~ data$gap)

summary(lm(data$ht ~ data$gap))
lm(data$ht ~ data$gap)
summary(lm(data$ht ~ data$gap))
summary(lm(data$ht ~ data$gap + data$killer))
summary(lm(data$ht ~ data$killer))
summary(lm(data$ht ~ data$gap + data$killer))
plot(data$ht ~ data$gap + data$killer)

hist(rnorm(1000,mean=50,sd=1))
hist(rnorm(1000,mean=50,sd=1), main="My histogram")
hist(rnorm(1000,mean=50,sd=1), main="My histogram", xlab="Height of tree (m)")
jpeg()
hist(rnorm(1000,mean=50,sd=1), main="My histogram", xlab="Height of tree (m)")
dev.off()
q()

Analyzing your plot data

lambir <- read.table("lambir2912.dat", sep="|",na.strings=".")
mine <- read.table("mydata.csv", sep="|",header=T)
colnames(mine) <- c("tag","girth","sp")
colnames(lambir) <- c("tag","dbh","sp")
mine$dbh <- mine$girth / 3.1415
mine[1:10,]
lambir[1:10,]
new <- merge(lambir, mine, by="tag")
new[1:10,]
new$growth <- new$dbh.y - new$dbh.x
new[1:10,]
mean(new$growth, na.rm=T)

history()
q()

Dealing with non-numeric data in columns

read.table interprets mixed numeric and character data as factors, which will prevent you doing numeric operations on that column, like mean(). You need to convert to numbers with two stages:

a <- as.factor(c("2.1","1","1.2","3","d","5.0"))
a
as.numeric(a)
as.character(a)
as.numeric(as.character(a))