-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path01_DataCleaning_03.2_rastClust.R
125 lines (94 loc) · 3.45 KB
/
01_DataCleaning_03.2_rastClust.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Description ---------------
# In this script
# - cluster the parking meters
# - by geographical location
# - using longitude and latitude
# Setup ----------------------------------------------
# Load required packages
library(tidyverse)
library(dplyr)
library(data.table)
library(tidyr)
library(ggplot2)
library(ggmap)
# Clear workspace
rm(list=ls())
graphics.off()
# Register Google Key
register_google(key="AIzaSyAfPULmtU7hUcoj4lboRAbzVg-810wrkJs")
# Load the previousely saved merged version of our parking data
load("../02_Business_Analytics_Data/df_set_02_merged.RData")
load("../Schramm, Cornelius - 02_Business_Analytics_Data/df_set_02_merged.RData")
# ... -----
# ...
df = DF_merged[,c(1:3)]
locations = data.frame(df[!duplicated(df[,c("SourceElementKey","lon","lat")]),][,c(1:3)])
row.names(locations) = NULL
# ...
ysteps = seq(47.64, 47.59, -0.0015)
xsteps = seq(-122.36, -122.30, 0.0025)
# Map Grid to create
map = get_map("Seattle", zoom = 13)
ggmap(map) +
geom_point(data=locations,
mapping=aes(x=lon,
y=lat),
alpha=.8) +
ylim(47.59, 47.64) +
xlim(-122.375, -122.3) +
geom_hline(yintercept = ysteps, color="red") +
geom_vline(xintercept = xsteps, color="red")
for (i in seq(1,length(xsteps),1)) {
locations[locations$lon > xsteps[i] & locations$lon < ysteps[i+1], "xcluster"] = i
}
for (i in seq(1,length(ysteps),1)) {
locations[locations$lat < ysteps[i] & locations$lat > ysteps[i+1], "ycluster"] = i
}
locations$cluster = paste0(locations$xcluster,"-",locations$ycluster)
table(factor(as.character(locations$cluster)))
# Create numeric cluster names
tempDF = data.frame(locations[!duplicated(locations[,"cluster"]),][,c(6)])
tempDF$ClustNum = c(seq(1,nrow(tempDF), 1))
colnames(tempDF) = c("cluster","ClustNum")
locations = locations %>%
left_join(tempDF, by ="cluster")
# Merge cluster into main dataframe
locations = locations[,c(1:3,6,7)]
DF_merged = merge(locations, DF_merged, by="SourceElementKey")
# Renaming
colnames(DF_merged)[2:3]= c("ClusterLabel","cluster")
# Aggregating by clusters ----
# Saving the information columns to temporary dataframe
# Making mergeCol
DF_merged = transform(DF_merged, MergeCol=paste(date, hour,cluster ,sep="_"))
tempDF = data.frame(DF_merged[!duplicated(DF_merged[,"MergeCol"]),][,])
# Aggregate by clusters
tempDF2 = aggregate(DF_merged$freeParkingSpaces,
by = list(cluster = DF_merged$cluster,
date = DF_merged$date,
hour = DF_merged$hour),
FUN = sum)
# Making mergeCol
tempDF2 = transform(tempDF2, MergeCol=paste(date, hour, cluster ,sep="_"))
# Merging back together
DF_Rastclust = tempDF2 %>%
left_join(tempDF, by= "MergeCol")
# Sorting
DF_Rastclust = DF_Rastclust[,-c(6,7,9,10,13,14,15,16,31)]
DF_Rastclust = DF_Rastclust[,-6]
# Renaming
colnames(DF_Rastclust)[1:4]= c("cluster", "date", "hour", "freeParkingSpaces")
# Plot cluster
map = get_map("Seattle", zoom = 13)
ggmap(map) +
geom_point(data=locations,
mapping=aes(x=lon,
y=lat,
color=cluster),
alpha=.8) +
ylim(47.59, 47.64) +
xlim(-122.375, -122.3)
# Save -----
rm(df, DF_merged, xsteps, ysteps, locations, tempDF, tempDF2, i,map)
# save.image(file = "../02_Business_Analytics_Data/df_set_03_rasterCluster.RData")
# save.image(file = "../Schramm, Cornelius - 02_Business_Analytics_Data/df_set_03_rasterCluster.RData")