-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathreportModelPredictionsPerformanceMAsEff.R
88 lines (76 loc) · 3.17 KB
/
reportModelPredictionsPerformanceMAsEff.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Title : Models predictions accuracy performance / stability CSV report.
rm(list = ls())
library(caret)
library(psych)
source("analysis.r")
testPredictAccuracy <- function(predictFilename) {
cat("Processing: ", predictFilename, "\n")
filenameParts <- unlist(strsplit(predictFilename, "-"))
symbolTest <- paste(filenameParts[1], filenameParts[2], sep = "-")
startDate <- as.Date(format(Sys.Date() - 210, "%Y-%m-01"))
securityDataTest <- mainOpenSecurity(
symbolTest, 2, 4,
"%Y-%m-%d", startDate
)
predictPath <- paste(basePath, predictFilename, sep = "")
predictFileInfo <- file.info(predictPath)
dailyIndicator <- fread(predictPath)
dailyIndicator[, Date := as.Date(Date)]
dailyIndicator[, YearMonth := format(Date, "%Y-%m")]
dailyIndicator <- merge(
securityDataTest[, c('Date', 'Mid', 'diffPercent', 'Eff2')],
dailyIndicator,
by = "Date"
)
calculateAccuracy <- function(monthlyData) {
categoryLevels = c("buy", "sell")
confusionData <- table(
actualclass = factor(monthlyData$Eff2, levels = categoryLevels),
predictedclass = factor(monthlyData$EffPred, levels = categoryLevels)
) %>% caret::confusionMatrix()
accuracy <- confusionData$overall['Accuracy']
prevalence <- confusionData$byClass['Prevalence']
list(
N = nrow(monthlyData),
Accuracy = accuracy,
Prevalence = prevalence
)
}
accuracyTest <- dailyIndicator[, calculateAccuracy(.SD), by = "YearMonth"]
# Filter months that don't have at least N observations yet.
accuracyTest <- accuracyTest[N >= 7]
# Calculate descriptive statistics for Accuracy / Prevalence.
descriptives6m <- round(describe(head(accuracyTest[, c('Accuracy', 'Prevalence')], 6)), 3)
descriptives3m <- round(describe(tail(accuracyTest[, c('Accuracy', 'Prevalence')], 3)), 3)
descriptives2m <- round(describe(tail(accuracyTest[, c('Accuracy', 'Prevalence')], 2)), 3)
descriptives1m <- round(describe(tail(accuracyTest[, c('Accuracy', 'Prevalence')], 1)), 3)
createDate <- predictFileInfo$mtime
prodDays <- as.numeric(difftime(Sys.Date(), as.Date(createDate), units = "days"))
return(
data.table(
PredictFile = predictFilename,
Created = createDate,
ProdDays = prodDays,
Acc6m = descriptives6m$mean[1],
Acc3m = descriptives3m$mean[1],
Acc2m = descriptives2m$mean[1],
Acc1m = descriptives1m$mean[1],
AccSD6m = descriptives6m$sd[1],
AccSD3m = descriptives3m$sd[1],
Prev6m = descriptives6m$mean[2],
Prev3m = descriptives3m$mean[2],
Prev2m = descriptives2m$mean[2],
Prev1m = descriptives1m$mean[2],
PrevSD6m = descriptives6m$sd[2],
PrevSD3m = descriptives3m$sd[2]
)
)
}
getMySymbolsData("working")
basePath <- "./predictions/"
predictFiles <- list.files(basePath, pattern = "*.csv")
testResults <- setDT(rbindlist(lapply(predictFiles, testPredictAccuracy)))
reportDate <- format(Sys.Date(), "%Y-%m-%d")
modelsPredictSummaryFilename <- paste("./predictions/", "models-predict-performance-maseff-", reportDate, ".csv", sep = "")
fwrite(testResults, modelsPredictSummaryFilename)
cat("Models summary exported to:", modelsPredictSummaryFilename, "\n")