https://github.com/bioinformatics-core-shared-training/linear-models-r/blob/master/data/amess.csv
df = read.csv("amess.csv")
View(df)
summary(df)
folate treatmnt
Min. :206.0 Min. :1.000
1st Qu.:249.5 1st Qu.:1.000
Median :274.0 Median :2.000
Mean :283.2 Mean :1.864
3rd Qu.:305.5 3rd Qu.:2.000
Max. :392.0 Max. :3.000
df$treatmnt <- factor(df$treatmnt)
summary(df)
folate treatmnt
Min. :206.0 1:8
1st Qu.:249.5 2:9
Median :274.0 3:5
Mean :283.2
3rd Qu.:305.5
Max. :392.0
levels(df$treatmnt)
[1] "1" "2" "3"
stripchart(df$folate ~ df$treatmnt, col = c("blue", "green", "black"))
tapply(df$folate, df$treatmnt, mean)
1 2 3
316.6250 256.4444 278.0000
boxplot(df$folate ~ df$treatmnt)
shapiro.test(df$folate[df$treatmnt == "1"])
Shapiro-Wilk normality test
data: df$folate[df$treatmnt == "1"]
W = 0.90704, p-value = 0.3337
shapiro.test(df$folate[df$treatmnt == "2"])
Shapiro-Wilk normality test
data: df$folate[df$treatmnt == "2"]
W = 0.9469, p-value = 0.6561
shapiro.test(df$folate[df$treatmnt == "3"])
Shapiro-Wilk normality test
data: df$folate[df$treatmnt == "3"]
W = 0.96355, p-value = 0.8325
bartlett.test(df$folate ~ df$treatmnt)
Bartlett test of homogeneity of variances
data: df$folate by df$treatmnt
Bartlett's K-squared = 2.0951, df = 2, p-value = 0.3508
summary(aov(df$folate ~ df$treatmnt))
Df Sum Sq Mean Sq F value Pr(>F)
df$treatmnt 2 15516 7758 3.711 0.0436 *
Residuals 19 39716 2090
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
contrasts(df$treatmnt)
2 3
1 0 0
2 1 0
3 0 1
summary(lm(df$folate ~ df$treatmnt))
Call:
lm(formula = df$folate ~ df$treatmnt)
Residuals:
Min 1Q Median 3Q Max
-73.625 -35.361 -4.444 35.625 75.375
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 316.62 16.16 19.588 4.65e-14 ***
df$treatmnt2 -60.18 22.22 -2.709 0.0139 *
df$treatmnt3 -38.62 26.06 -1.482 0.1548
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 45.72 on 19 degrees of freedom
Multiple R-squared: 0.2809, Adjusted R-squared: 0.2052
F-statistic: 3.711 on 2 and 19 DF, p-value: 0.04359
tapply(df$folate, df$treatmnt, mean)
1 2 3
316.6250 256.4444 278.0000
mean(tapply(df$folate, df$treatmnt, mean))
[1] 283.6898
contrasts(df$treatmnt) <- contr.sum
contrasts(df$treatmnt)
[,1] [,2]
1 1 0
2 0 1
3 -1 -1
summary(lm(df$folate ~ df$treatmnt))
Call:
lm(formula = df$folate ~ df$treatmnt)
Residuals:
Min 1Q Median 3Q Max
-73.625 -35.361 -4.444 35.625 75.375
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 283.69 10.06 28.188 <2e-16 ***
df$treatmnt1 32.94 13.73 2.400 0.0268 *
df$treatmnt2 -27.25 13.37 -2.038 0.0557 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 45.72 on 19 degrees of freedom
Multiple R-squared: 0.2809, Adjusted R-squared: 0.2052
F-statistic: 3.711 on 2 and 19 DF, p-value: 0.04359