-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
78 lines (42 loc) · 2.28 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(tidyverse)
# download and unzip files
if (!dir.exists("./data")) {dir.create("./data")}
if (!file.exists("./data/project_dataset.zip")) {
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip",
"./data/project_dataset.zip")
unzip("./data/project_dataset.zip", exdir = "./data")
}
# read the data
features <- read_lines("./data/UCI HAR Dataset/features.txt")
test_x_read <- read_delim("./data/UCI HAR Dataset/test/X_test.txt", delim = " ",
col_names = features)
train_x_read <- read_delim("./data/UCI HAR Dataset/train/X_train.txt", delim = " ",
col_names = features)
# change all variables to numeric (if needed)
test_x <- test_x_read %>%
mutate_if(is.character, as.numeric)
train_x <- train_x_read %>%
mutate_if(is.character, as.numeric)
# add info about subjects and activities
test_x$subject <- read_lines("./data/UCI HAR Dataset/test/subject_test.txt")
test_x$activity <- read_lines("./data/UCI HAR Dataset/test/y_test.txt")
train_x$subject <- read_lines("./data/UCI HAR Dataset/train/subject_train.txt")
train_x$activity <- read_lines("./data/UCI HAR Dataset/train/y_train.txt")
# merge them into one
merged <- rbind(train_x, test_x)
# select only measurments of mean and standard deviation
subset_variables <- features[grepl("[mM]ean|std", features)]
merged_subset <- merged %>% select(all_of(subset_variables), subject, activity)
# add descriptive activity names
activity_labels <- read_delim("./data/UCI HAR Dataset/activity_labels.txt", delim = " ",
col_names = c("activity_number", "activity_label"),
col_types = "cc")
merged_subset <- left_join(merged_subset, activity_labels, by = c("activity" = "activity_number")) %>%
select(-activity)
# clean up variable names a little (remove parentheses and replace spaces via nested gsub)
names(merged_subset) <- tolower(gsub("\\(\\)", "", gsub(" ", "_", names(merged_subset))))
# compute average values for each combination of subject and activity
variable_means <- merged_subset %>% group_by(subject, activity_label) %>%
summarise_all(mean)
# export the final dataset
write.table(variable_means, file = "./avg_data.txt", row.names = FALSE)