-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path01_DataCleaning_03.3_simClust.R
60 lines (41 loc) · 1.32 KB
/
01_DataCleaning_03.3_simClust.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Description ---------------
# In this script
# - cluster the time series based on similarities
# Setup ----------------------------------------------
# Load required packages
library(tidyverse)
library(data.table)
library(quantmod)
library(dtwclust)
library(ggplot2)
library(dtw)
# Clear workspace
rm(list=ls())
graphics.off()
# Load Data
load("../02_Business_Analytics_Data/df_set_04_Sort4Clust.RData")
load("../Schramm, Cornelius - 02_Business_Analytics_Data/df_set_04_Sort4Clust.RData")
# DO SOMETHING -----
# Standardizing all observations
datetime = finalDF[,1]
ScaledDF = scale(finalDF[,-1])
FinalScaled = cbind(datetime,ScaledDF)
# Compartmentalizing by start and end dates-----------
# This part still needs some work
test = head(FinalScaled)
NAind = (colnames(test)[colSums(is.na(test)) > 0])
!names(FinalScaled) %in% NAind
DF_compart1 = subset(FinalScaled, select= !names(FinalScaled) %in% NAind)
DF_compart2 = subset(FinalScaled, select=c(1,names(FinalScaled) %in% NAind))
# Converting to time series -----------------
tsDF = as.ts(FinalScaled)
SpielDF = tsDF[c(1:720),c(1,426:435)]
SpielDF = SpielDF[,c(1:8)]
plot.ts(SpielDF[,2:11],
type = "b",
col = "blue")
# Calculating distance
distance = dist (SpielDF, method = "DTW")
# Hirachical Clustering
hc = hclust(distance, method = "average")
plot(hc)