-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
154 lines (118 loc) · 7.06 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
tidy <- function() {
##This library is needed for reshaping the data later
library(reshape2)
##Stores m as 1 to consider user prompt as y to overwrite data by default
m <- 1
##Checks if tidydata.txt was created by this function already, if so it reads it into memory
if(file.exists("tidydata.txt")) {
##Asks user if they want to overwrite existing file, if yes store as 1, if no store as 2
l <- readline("File exists, overwrite y/n? ")
if (l == "y") {m<-1} else if (l == "n") {m<-2} else {m<-0}
##If user answer was no, reads current file into memory, if yes it continues, if neither, function stops
if (m == 2) {
tidydata <- read.table("tidydata.txt")
print("loaded current tidydata.txt")
} else if (m == 0) {
stop("invalid input function stopped")
}
}
##Checks if user prompt above was yes to overwrite data, continues function
if (m == 1) {
##Create new directory in active directory if it doesn't exist
if(!file.exists("data")){dir.create("data")}
##If the needed files haven't been downloaded, downloads them
if(!file.exists("./data/UCI HAR Dataset")) {
##downloads file into temporary memory, and saves datedownloaded variable
temp <- tempfile()
fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(fileURL, temp)
datedownload <- date()
##Unzips the downloaded data and unlinks the temporary file
unzip(temp, exdir="./data")
unlink(temp)
}
##Sets a variable to direct to the downloaded files
maindir <- paste("./data/",list.files("./data")[1],"/",sep = "")
##Sets a variable to list downloaded files
dirlist <- list.files(maindir)
##Creates data.frames for the first few files in the directory
actlab <- read.table(paste(maindir,dirlist[1],sep=""))
colnames(actlab) <- c("activitynumber","activityname")
feat <- read.table(paste(maindir,dirlist[2],sep=""))
##Sets variables to use later for test and train directories, which contain more files
testdir <- paste(maindir,dirlist[5],"/",sep="")
traindir <- paste(maindir,dirlist[6],"/",sep="")
usedir <- c(testdir,traindir)
##Sets variables to use later to list files in test and train directories
testfiles <- list.files(paste(maindir,dirlist[5],sep=""))
trainfiles <- list.files(paste(maindir,dirlist[6],sep=""))
usefiles <- list(testfiles,trainfiles)
##Creates variables to store test and train data
testdata <- data.frame()
traindata <- data.frame()
datalist <- list(testdata,traindata)
##Populates data to testdata and traindata. Loops twice, once for each data set
##When complete, variable datalist contains complete testdata and traindata
for (i in 1:2) {
##Puts subject names, x measurement values, and y labels into variables
sub <- read.table(paste(usedir[i],usefiles[[i]][2],sep=""))
x <- read.table(paste(usedir[i],usefiles[[i]][3],sep=""))
y <- read.table(paste(usedir[i],usefiles[[i]][4],sep=""))
##Loads above variables y and sub into datatable
datalist[[i]] <- data.frame(y[,1])
datalist[[i]] <- cbind(datalist[[i]],sub[,1])
##Updates the column names for the datatable
colnames(datalist[[i]]) <- c("activitynumber","subjectnumber")
##Adds measurement data into the table, but only for means and standard deviations
for (n in 1:ncol(x)) {
##Pulls the feature name from the features table
featname <- as.character(feat[n,2])
##Takes the feature name and concats the end of it to find the variable type
colfeat <- substr(featname,(nchar(featname)-7),(nchar(featname)-4))
##Turns the feature name into a tidy column name
colname <- gsub("-","",featname)
colname <- sub("\\(","",colname)
colname <- sub("\\)","",colname)
colname <- tolower(colname)
##This is done to make the variable more descriptive and remove some abbreviations
if (substr(colname,1,1)=="t") {
colname <- sub("t","time",colname)
} else if (substr(colname,1,1)=="f"){
colname <- sub("f","frequency",colname)
}
##Only adds data for variable types that are means or standard deviations
if (colfeat=="mean") {
##Adds the measurement data to the last column of train or test data
datalist[[i]] <- cbind(datalist[[i]],x[,n])
##Changes the newest column name to match the tidy column name created above
colnames(datalist[[i]])[ncol(datalist[[i]])] <- colname
} else if (colfeat=="-std") {
##Adds the measurement data to the last column of train or test data
datalist[[i]] <- cbind(datalist[[i]],x[,n])
##Changes the newest column name to match the tidy column name created above
colnames(datalist[[i]])[ncol(datalist[[i]])] <- colname
} else {next}
}
}
##Creates a final tidy data set with all the test and train variables in one data frame
tidydata <- rbind(datalist[[1]],datalist[[2]])
##Add the activity names to the spreadsheet
tidydata <- merge(actlab, tidydata, by = "activitynumber")
##Sets activityname as factor
tidydata$activityname <- factor(tidydata$activityname)
##Removes the first activitynumber column
tidydata <- tidydata[,2:ncol(tidydata)]
##Saves the data as a text file
write.table(tidydata, "tidydata.txt")
print("data written to tidydata.txt")
}
##reshapes the data in narrow format so that we dcast it
meltdata <- melt(tidydata,id=c("activityname","subjectnumber"))
##casts the narrow data back into a wide format with only the averages of each activity and subject
tidydata2 <- dcast(meltdata, activityname + subjectnumber ~ variable,mean)
##Updates the column names to end in 'avg' so that it describes the fact that these are the averages of the variables
colnames(tidydata2)[3:ncol(tidydata2)] <- as.character(sapply(colnames(tidydata2)[3:ncol(tidydata2)],function(x) paste0(x,"avg")))
##Writes the final data into a text file
write.table(tidydata2, "tidydata2.txt")
print("data written to tidydata2.txt")
}