-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFirstMiniProj.r
48 lines (35 loc) · 1.49 KB
/
FirstMiniProj.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(plyr)
pollutantmean <- function(directory, pollutant, id = 1:332) {
csv_files <- list.files(path = directory)[id]
csv_read <- vector(mode = "list")
for (csv_file in csv_files) {
csv_read <- append(csv_read, list(read.csv(paste(directory, "/", csv_file, sep=""))))
}
csv_cumulative <- rbind.fill(csv_read)
mean(csv_cumulative[, pollutant], na.rm = TRUE)
}
complete <- function(directory, id = 1:332) {
csv_files <- list.files(path = directory)[1:332]
csv_cumulative <- c()
for (csv_idx in id) {
csv_clean <- na.omit(read.csv(paste(directory, "/", csv_files[csv_idx], sep="")))
csv_cumulative[csv_idx] <- nrow(csv_clean)
}
clean_data <- data.frame(id = id, nobs = na.omit(csv_cumulative))
clean_data
}
corr <- function(directory, threshold = 0) {
csv_files <- list.files(path = directory)[1:332]
csv_correlations <- vector(mode = "numeric")
for (csv_file in csv_files) {
csv_clean <- na.omit(read.csv(paste(directory, "/", csv_file, sep="")))
if(nrow(csv_clean) > threshold) {
csv_correlation <- cor(csv_clean[, "sulfate"], csv_clean[, "nitrate"])
csv_correlations <- append(csv_correlations, csv_correlation)
}
}
csv_correlations
}
outcome <- read.csv("ProgHospData/outcome-of-care-measures.csv", colClasses = "character")
outcome[, 11] <- as.numeric(outcome[, 11])
hist(outcome[, 11], xlab="Deaths", main="Hospital 30-Day Death (Mortality) Rates from Heart Attack")