-
Notifications
You must be signed in to change notification settings - Fork 0
/
R_code_of_the_statistical_analysis.r
74 lines (40 loc) · 2.75 KB
/
R_code_of_the_statistical_analysis.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
sessionInfo()
options(repos = c(CRAN = "https://mran.revolutionanalytics.com/snapshot/2018-09-14"))
getOption("repos")
#install.packages("dplyr")
library(dplyr)
packageVersion("dplyr")
#install.packages("prettyR")
library(prettyR)
packageVersion("prettyR")
nhanes_extracted <- read.csv("NHANES_extracted.csv")
head(nhanes_extracted)
nhanes_extracted <- nhanes_extracted[-1]
glimpse(nhanes_extracted)
variables <- c('index', 'female', 'male', 'age', 'body_mass_index', 'waist_circumference', 'sugarfree_diet' )
names(nhanes_extracted) <- variables
head(nhanes_extracted)
nhanes_extracted$sugarfree_diet <- factor(nhanes_extracted$sugarfree_diet, levels=c('0','1'), labels=c('No','Yes'))
table(nhanes_extracted$female, nhanes_extracted$male, deparse.level = 2, useNA = 'always')
nhanes_extracted <- nhanes_extracted %>% mutate(sex = ifelse(female == 1, "female", "male")) %>% select(-c('female', 'male'))
nhanes_extracted$sex <- as.factor(nhanes_extracted$sex)
head(nhanes_extracted)
describe(nhanes_extracted[, -1], num.desc = c("mean", "sd", "median", "min", "max", "valid.n"))
gender_distribution <- table(nhanes_extracted$sex)
gender_distribution
gender_proportion<- round(100*gender_distribution/sum(gender_distribution), 1)
gender_proportion
gender_percent <- paste(gender_proportion,"%",sep="")
pie(gender_distribution, labels = gender_percent, main = "Gender distribution", col = c("pink", "blue"))
legend("topright", c("female", "male"), fill = c("pink", "blue"))
nhanes_extracted %>% group_by(sex)%>% summarise(first_quantile_age = quantile(age, probs = 0.25, na.rm = TRUE), median_age = median(age, na.rm = TRUE), third_quantile_age = quantile(age, probs = 0.75, na.rm = TRUE))
boxplot(age~sex, data = nhanes_extracted, main = "Boxplot of age distribution by gender", xlab = "Gender", ylab = "Age in years", col = c("pink", "blue"))
nhanes_extracted %>% group_by(sex)%>% summarise(first_quantile_bmi = quantile(body_mass_index, probs = 0.25, na.rm = TRUE), median_bmi = median(body_mass_index, na.rm = TRUE), third_quantile_bmi = quantile(body_mass_index, probs = 0.75, na.rm = TRUE))
boxplot(body_mass_index~sex, data = nhanes_extracted, main = "Boxplot of BMI distibution by gender", xlab = "Gender", ylab = "BMI in kg/m^2", col = c("pink", "blue"))
nhanes_over_two <- nhanes_extracted %>% filter(age >=2)
#quick check of the number of observations and variables
dim(nhanes_over_two)
describe(nhanes_over_two[, -1], num.desc = c("mean", "sd", "median", "min", "max", "valid.n"))
nhanes_over_two %>% group_by(sugarfree_diet)%>% summarise(count = n(), mean = mean(waist_circumference, na.rm = TRUE))
nhanes_over_two %>% filter(is.na(nhanes_over_two$waist_circumference))%>% nrow()
wilcox.test(waist_circumference~sugarfree_diet, conf.int=TRUE, data = nhanes_over_two)