-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNaiveBayes.R
74 lines (60 loc) · 1.91 KB
/
NaiveBayes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#Simple naive bayes method-------------------------------------------------
###############################################################################
# Libraries
library(naivebayes)
library(dplyr)
library(ggplot2)
library(psych)
# Data
data <- read.csv(file.choose(), header = T)
#data <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header=FALSE, sep=",", dec=".", na.strings=c("?"))
str(data)
xtabs(~admit+rank, data = data)
data$rank <- as.factor(data$rank)
data$admit <- as.factor(data$admit)
# Visualization
pairs.panels(data[-1])
data %>%
ggplot(aes(x=admit, y=gpa, fill = admit)) +
geom_boxplot() +
ggtitle("Box Plot")
data %>% ggplot(aes(x=gpa, fill = admit)) +
geom_density(alpha=0.8, color= 'black') +
ggtitle("Density Plot")
# Data Partition way---1
#set.seed(1234)
#ind <- sample(2, nrow(data), replace = T, prob = c(0.8, 0.2))
#train <- data[ind == 1,]
#test <- data[ind == 2,]
library(caret) # data partition way-2
set.seed(123)
sample <- createDataPartition(data$admit, p = .8)[[1]]
train <- data[ sample,]
test <- data[-sample,]
# data partition way-3
library(caTools)
sample = sample.split(data$admit, SplitRatio = .80)
train = subset(data, sample == TRUE)
test = subset(data, sample == FALSE)
print('Test train dimension')
dim(test)
dim(train)
NaiveBayesModel <- naive_bayes(V1 ~. , data = Mushrooms[training_index, ])
# Naive Bayes Model
model <- naive_bayes(admit ~ ., data = train, usekernel = F)
model
train %>%
filter(admit == "0") %>%
summarise(mean(gre), sd(gre))
plot(model)
# Predict
p <- predict(model, test,type='prob')
head(cbind(p, test))
# Confusion Matrix - train data
p1 <- predict(model, train)
(tab1 <- table(p1, train$admit))
1 - sum(diag(tab1)) / sum(tab1)
# Confusion Matrix - test data
p2 <- predict(model, test)
(tab2 <- table(p2, test$admit))
1 - sum(diag(tab2)) / sum(tab2)