-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added cervical cancer example and improved minor things
- Loading branch information
Goerke
authored and
Goerke
committed
Aug 2, 2019
1 parent
44d99a6
commit 75d355b
Showing
7 changed files
with
132 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,62 @@ | ||
library(mlr) | ||
|
||
load(paste0(getwd(), "/inst/examples/ExampleCancer/cervical.RData")) | ||
|
||
# our goal is to predict whether individuum has cancer | ||
task = makeClassifTask(data = cervical, target = "Biopsy", id = "Biopsy") | ||
|
||
# setting up a learner | ||
lrn.rpart = makeLearner("classif.rpart") | ||
|
||
# train the learner on the training set | ||
model = mlr::train(learner = lrn.rpart, task = task) | ||
|
||
|
||
# Setting up a perturbation function. As we want explain a tabular instance (an observation in our dataset iris), we stick to a featureless tabular perturbation function | ||
perturbator = makePerturbFun("tabular.featureless") | ||
|
||
# discretizing the dataset | ||
## TODO: add better discretes | ||
discCervical = cervical | ||
discCervical[,"Smokes"] = NA | ||
discCervical[,"Smokes..years."] = NA | ||
discCervical[,"Hormonal.Contraceptives"] = NA | ||
discCervical[,"Hormonal.Contraceptives..years."] = NA | ||
discCervical[,"IUD"] = NA | ||
discCervical[,"IUD..years."] = NA | ||
discCervical[,"STDs"] = NA | ||
discCervical[,"STDs..number."] = NA | ||
discCervical[,"STDs..Number.of.diagnosis"] = NA | ||
discCervical[,"STDs..Time.since.first.diagnosis"] = NA | ||
discCervical[,"STDs..Time.since.last.diagnosis"] = NA | ||
discCervical = arules::discretizeDF(discCervical) | ||
discCervical[,"Smokes"] = arules::discretize(cervical[,"Smokes"], breaks =1 ) | ||
discCervical[,"Smokes..years."] = arules::discretize(cervical[,"Smokes..years."], breaks =1) | ||
discCervical[,"Hormonal.Contraceptives"] = arules::discretize(cervical[,"Hormonal.Contraceptives"], breaks =1) | ||
discCervical[,"Hormonal.Contraceptives..years."] = arules::discretize(cervical[,"Hormonal.Contraceptives..years."], breaks =2) | ||
discCervical[,"IUD"]= arules::discretize(cervical[,"IUD"], breaks =1 ) | ||
discCervical[,"IUD..years."]= arules::discretize(cervical[,"IUD..years."], breaks =1) | ||
discCervical[,"STDs"]= arules::discretize(cervical[,"STDs"], breaks =1) | ||
discCervical[,"STDs..number."]= arules::discretize(cervical[,"STDs..number."], breaks =1) | ||
discCervical[,"STDs..Number.of.diagnosis"]= arules::discretize(cervical[,"STDs..Number.of.diagnosis"], breaks =1) | ||
discCervical[,"STDs..Time.since.first.diagnosis"]= arules::discretize(cervical[,"STDs..Time.since.first.diagnosis"], breaks =1) | ||
discCervical[,"STDs..Time.since.last.diagnosis"]= arules::discretize(cervical[,"STDs..Time.since.last.diagnosis"], breaks =1) | ||
load("inst/examples/ExampleCancer/cervical.RData") | ||
|
||
cervical_label_cancer = cervical[cervical$Biopsy == "Cancer",] | ||
cervical_label_healthy = cervical[cervical $Biopsy == "Healthy",] | ||
cervical_label_healthy = cervical_label_healthy[sample(1:nrow(cervical_label_healthy), nrow(cervical_label_cancer)), ] | ||
cervical = rbind(cervical_label_cancer, cervical_label_healthy) | ||
|
||
cervical.task = makeClassifTask(data = cervical, target = "Biopsy") | ||
model = mlr::train(mlr::makeLearner(cl = 'classif.rpart', id = 'cervical-rf', predict.type = 'prob'), cervical.task) | ||
|
||
# Visualize | ||
rpart.plot::rpart.plot(getLearnerModel(model)) | ||
|
||
bins <- list() | ||
for (i in 1:(ncol(cervical)-1)) { | ||
bins[[i]] <- list() | ||
bins[[i]]$doDiscretize <- T | ||
#bins[[i]]$numeric <- T | ||
#bins[[i]]$right = | ||
} | ||
|
||
# Age | ||
bins[[1]]$cuts <- c(15, 25, 35, 50, 60) | ||
# Number.of.sexual.partners | ||
bins[[2]]$cuts <- arules::discretize(cervical[, 2], breaks = 2, onlycuts = T) | ||
# First.sexual.intercourse | ||
bins[[3]]$cuts <- arules::discretize(cervical[, 3], breaks = 4, onlycuts = T) | ||
# Num.of.pregnancies | ||
bins[[4]]$cuts <- c(0, 1, 2, 4) | ||
# Smokes | ||
bins[[5]]$doDiscretize <- F | ||
# Smokes..years. | ||
bins[[6]]$cuts <- c(0, 2, 5, 10) | ||
# Hormonal.Contraceptives | ||
bins[[7]]$doDiscretize <- F | ||
# Hormonal.Contraceptives..years. | ||
bins[[8]]$cuts <- arules::discretize(cervical[, 8], method = "cluster", breaks = 4, onlycuts = T) | ||
# IUD | ||
bins[[9]]$doDiscretize <- F | ||
# IUD..years. | ||
bins[[10]]$cuts <- arules::discretize(cervical[, 10], method = "cluster", breaks = 4, onlycuts = T) | ||
# STDs | ||
bins[[11]]$doDiscretize <- F | ||
# STDs..number. | ||
bins[[12]]$cuts <- c(0, 1) | ||
# STDs..Number.of.diagnosis | ||
bins[[13]]$cuts <- c(0, 1) | ||
# STDs..Time.since.first.diagnosis | ||
bins[[14]]$cuts <- c(0, 3) | ||
# STDs..Time.since.last.diagnosis | ||
bins[[15]]$cuts <- c(0, 3) | ||
|
||
|
||
# Explain model with anchors | ||
explainer = anchors(cervical, model, perturbator, discX = discCervical) | ||
explainer = anchors(cervical, model, bins = bins) | ||
|
||
explanations = explain(cervical[1:2,], explainer) | ||
explanations = explain(cervical[3,], explainer) | ||
|
||
printExplanations(explainer, explanations) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters