library(MASS)
data(cats, package = "MASS")
View(cats)
str(cats)
'data.frame': 144 obs. of 3 variables:
$ Sex: Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
$ Bwt: num 2 2 2 2.1 2.1 2.1 2.1 2.1 2.1 2.1 ...
$ Hwt: num 7 7.4 9.5 7.2 7.3 7.6 8.1 8.2 8.3 8.5 ...
summary(cats)
Sex Bwt Hwt
F:47 Min. :2.000 Min. : 6.30
M:97 1st Qu.:2.300 1st Qu.: 8.95
Median :2.700 Median :10.10
Mean :2.724 Mean :10.63
3rd Qu.:3.025 3rd Qu.:12.12
Max. :3.900 Max. :20.50
hist(cats$Hwt)
boxplot(cats$Hwt)
library(ggplot2)
ggplot(cats, aes(sample = Hwt)) + stat_qq() + stat_qq_line()
?shapiro.test
shapiro.test(rnorm(n = 100, mean = 0, sd = 1))
Shapiro-Wilk normality test
data: rnorm(n = 100, mean = 0, sd = 1)
W = 0.97832, p-value = 0.09818
shapiro.test(runif(100, min = 1, max = 10))
Shapiro-Wilk normality test
data: runif(100, min = 1, max = 10)
W = 0.96753, p-value = 0.01436
shapiro.test(runif(3, min = 1, max = 10))
Shapiro-Wilk normality test
data: runif(3, min = 1, max = 10)
W = 0.8844, p-value = 0.3374
shapiro.test(cats$Hwt)
Shapiro-Wilk normality test
data: cats$Hwt
W = 0.96039, p-value = 0.0003654
https://www.kaggle.com/ionaskel/laptop-prices
df = read.csv("laptops.csv")
summary(df$Price_euros)
Min. 1st Qu. Median Mean 3rd Qu. Max.
174 599 977 1124 1488 6099
boxplot(df$Price_euros)
library(moments)
skewness(df$Price_euros)
[1] 1.519114
shapiro.test(df$Price_euros)
Shapiro-Wilk normality test
data: df$Price_euros
W = 0.89382, p-value < 2.2e-16