-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_import.R
223 lines (179 loc) · 14.3 KB
/
data_import.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
library(dplyr)
library(ggplot2)
library(lubridate)
################################################################################
################################################################################
################################################################################
### GENTLEMAN 1
water_1_df <- read.csv("data/1/water.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(water_1_df) <- c("start_time", "amount", "custom", "update_time",
"create_time", "time_offset", "deviceuuid",
"unit_amount", "comment", "pkg_name", "datauuid")
water_1_df <- water_1_df %>%
select("start_time", "amount") %>%
mutate(time = ymd_hms(start_time),
weekday = wday(time, label = TRUE, abbr = FALSE, locale = "en_US"),
date = format(time, "%Y.%m.%d"),
name = "Gentleman1"
) %>%
group_by(date, weekday, name) %>%
summarise(amount = sum(amount)) %>%
mutate(amount = ifelse(amount<1000, 1250, amount))
### GENTLEMAN 2
#woda spisywana recznie
water_2_df <- read.csv("data/2/water.csv",
header = FALSE, check.names = FALSE, stringsAsFactors = FALSE)
colnames(water_2_df) <- c("date", "weekday", "name", "amount")
### GENTLEMAN 3
water_3_df <- read.csv("data/3/water.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(water_3_df) <- c("start_time", "amount", "custom", "update_time",
"create_time", "time_offset", "deviceuuid",
"unit_amount", "comment", "pkg_name", "datauuid")
water_3_df <- water_3_df %>%
select("start_time", "amount") %>%
mutate(time = ymd_hms(start_time),
day = day(time),
weekday = wday(time, label = TRUE, abbr = FALSE, locale = "en_US"),
hour = hour(time),
dayHour = paste(day, hour, sep = "h"),
date = format(time, "%Y.%m.%d"),
name = "Gentleman3"
) %>%
group_by(date, weekday, name) %>%
summarise(amount = sum(amount))
################################################################################
################################################################################
################################################################################
### GENTLEMAN 1
step_1_df <- read.csv("data/1/steps.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(step_1_df) <- sub("^com.samsung.health.step_count\\.", "", c("duration", "version_code", "run_step", "walk_step",
"com.samsung.health.step_count.start_time",
"com.samsung.health.step_count.sample_position_type",
"com.samsung.health.step_count.custom",
"com.samsung.health.step_count.update_time",
"com.samsung.health.step_count.create_time",
"com.samsung.health.step_count.count",
"com.samsung.health.step_count.speed",
"com.samsung.health.step_count.distance",
"com.samsung.health.step_count.calorie",
"com.samsung.health.step_count.time_offset",
"com.samsung.health.step_count.deviceuuid",
"com.samsung.health.step_count.pkg_name",
"com.samsung.health.step_count.end_time",
"com.samsung.health.step_count.datauuid"))
step_1_df <- step_1_df %>% select("run_step", "walk_step", "start_time", "count", "speed", "distance") %>%
mutate(time = ymd_hms(start_time),
weekday = wday(time, label = TRUE, abbr = FALSE, locale = "en_US"),
dayHour = paste(day(time), hour(time), sep = "h"),
date = format(time, "%Y.%m.%d"),
name = "Gentleman1")
### GENTLEMAN 2
step_2_df <- read.csv("data/2/steps.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(step_2_df) <- sub("^com.samsung.health.step_count\\.", "", c("duration", "version_code", "run_step", "walk_step",
"com.samsung.health.step_count.start_time",
"com.samsung.health.step_count.sample_position_type",
"com.samsung.health.step_count.custom",
"com.samsung.health.step_count.update_time",
"com.samsung.health.step_count.create_time",
"com.samsung.health.step_count.count",
"com.samsung.health.step_count.speed",
"com.samsung.health.step_count.distance",
"com.samsung.health.step_count.calorie",
"com.samsung.health.step_count.time_offset",
"com.samsung.health.step_count.deviceuuid",
"com.samsung.health.step_count.pkg_name",
"com.samsung.health.step_count.end_time",
"com.samsung.health.step_count.datauuid"))
step_2_df <- step_2_df %>% select("run_step", "walk_step", "start_time", "count", "speed", "distance") %>%
mutate(time = ymd_hms(start_time),
weekday = wday(time, label = TRUE, abbr = FALSE, locale = "en_US"),
dayHour = paste(day(time), hour(time), sep = "h"),
date = format(time, "%Y.%m.%d"),
name = "Gentleman2")
### GENTLEMAN 3
step_3_df <- read.csv("data/3/steps.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(step_3_df) <- sub("^com.samsung.health.step_count\\.", "", c("duration", "version_code", "run_step", "walk_step",
"com.samsung.health.step_count.start_time",
"com.samsung.health.step_count.sample_position_type",
"com.samsung.health.step_count.custom",
"com.samsung.health.step_count.update_time",
"com.samsung.health.step_count.create_time",
"com.samsung.health.step_count.count",
"com.samsung.health.step_count.speed",
"com.samsung.health.step_count.distance",
"com.samsung.health.step_count.calorie",
"com.samsung.health.step_count.time_offset",
"com.samsung.health.step_count.deviceuuid",
"com.samsung.health.step_count.pkg_name",
"com.samsung.health.step_count.end_time",
"com.samsung.health.step_count.datauuid"))
step_3_df <- step_3_df %>% select("run_step", "walk_step", "start_time", "count", "speed", "distance") %>%
mutate(time = ymd_hms(start_time),
weekday = wday(time, label = TRUE, abbr = FALSE, locale="en_US"),
dayHour = paste(day(time), hour(time), sep = "h"),
date = format(time, "%Y.%m.%d"),
name = "Gentleman3")
################################################################################
################################################################################
################################################################################
### GENTLEMAN 1
sleep_1_df <- read.csv("data/1/sleep.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(sleep_1_df) <- sub("^com\\.samsung\\.health\\.sleep\\.", "", c("original_efficiency", "mental_recovery", "factor_01", "factor_02",
"factor_03", "factor_04", "factor_05", "factor_06", "factor_07",
"factor_08", "factor_09", "factor_10", "has_sleep_data", "combined_id",
"sleep_type", "data_version", "physical_recovery", "original_wake_up_time",
"movement_awakening", "original_bed_time", "goal_bed_time", "quality",
"extra_data", "goal_wake_up_time", "sleep_cycle", "efficiency",
"sleep_score", "sleep_duration", "com.samsung.health.sleep.create_sh_ver",
"com.samsung.health.sleep.start_time", "com.samsung.health.sleep.custom",
"com.samsung.health.sleep.modify_sh_ver", "com.samsung.health.sleep.update_time",
"com.samsung.health.sleep.create_time", "com.samsung.health.sleep.time_offset",
"com.samsung.health.sleep.deviceuuid", "com.samsung.health.sleep.comment",
"com.samsung.health.sleep.pkg_name", "com.samsung.health.sleep.end_time",
"com.samsung.health.sleep.datauuid"))
sleep_1_df <- sleep_1_df %>%
select("start_time", "end_time") %>%
mutate(start_time = ymd_hms(start_time),
end_time = ymd_hms(end_time),
duration = sprintf("%02d:%02d",
floor(as.numeric(difftime(end_time, start_time, units = "mins")) / 60),
floor(as.numeric(difftime(end_time, start_time, units = "mins")) %% 60)),
day = day(end_time),
weekday = wday(end_time, label = TRUE, abbr = FALSE, locale = "en_US"),
name = "Gentleman1")
### GENTLEMAN 2
#spisywane recznie
sleep_2_df <- read.csv("data/2/sleep.csv",
header = TRUE, check.names = FALSE, stringsAsFactors = FALSE)
### GENTLEMAN 3
sleep_3_df <- read.csv("data/3/sleep.csv",
header = TRUE, skip = 2, check.names = FALSE, stringsAsFactors = FALSE)
colnames(sleep_3_df) <- sub("^com\\.samsung\\.health\\.sleep\\.", "", c("original_efficiency", "mental_recovery", "factor_01", "factor_02",
"factor_03", "factor_04", "factor_05", "factor_06", "factor_07",
"factor_08", "factor_09", "factor_10", "has_sleep_data", "combined_id",
"sleep_type", "data_version", "physical_recovery", "original_wake_up_time",
"movement_awakening", "original_bed_time", "goal_bed_time", "quality",
"extra_data", "goal_wake_up_time", "sleep_cycle", "efficiency",
"sleep_score", "sleep_duration", "com.samsung.health.sleep.create_sh_ver",
"com.samsung.health.sleep.start_time", "com.samsung.health.sleep.custom",
"com.samsung.health.sleep.modify_sh_ver", "com.samsung.health.sleep.update_time",
"com.samsung.health.sleep.create_time", "com.samsung.health.sleep.time_offset",
"com.samsung.health.sleep.deviceuuid", "com.samsung.health.sleep.comment",
"com.samsung.health.sleep.pkg_name", "com.samsung.health.sleep.end_time",
"com.samsung.health.sleep.datauuid"))
sleep_3_df <- sleep_3_df %>%
select("start_time", "end_time") %>%
mutate(start_time = ymd_hms(start_time),
end_time = ymd_hms(end_time),
duration = sprintf("%02d:%02d",
floor(as.numeric(difftime(end_time, start_time, units = "mins")) / 60),
floor(as.numeric(difftime(end_time, start_time, units = "mins")) %% 60)),
day = day(end_time),
weekday = wday(end_time, label = TRUE, abbr = FALSE, locale = "en_US"),
name = "Gentleman3")