Skip to content

Commit

Permalink
Merge pull request #935 from trvinh/master
Browse files Browse the repository at this point in the history
class2tree checks duplicated taxa in higher levels, resolve #934
  • Loading branch information
zachary-foster authored Sep 4, 2024
2 parents 889072f + 0cbb9c3 commit fe1ec55
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
30 changes: 30 additions & 0 deletions R/class2tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,36 @@ rank_indexing <- function (rankList) {
#' @author Vinh Tran {[email protected]}
taxonomy_table_creator <- function (nameList, rankList) {
colnames(nameList)[1] <- "tip"
# remove duplicated taxa (e.g. taxa with higher levels, that already belong
# to the taxonomy string of other taxa)
duplicatedTaxa <- lapply(
nameList$X1,
function (x) {
matchs <-data.frame(which(nameList[,-1] == x, arr.ind=TRUE))
checkHigherRank <- lapply(
matchs$row,
function(y) {
if (nameList[y,]$X1 != x) return(1)
}
)
if (length(unlist(checkHigherRank)) > 0)
return(strsplit(x, "#", fixed = TRUE)[[1]][1])
}
)
if (length(unlist(duplicatedTaxa)) > 0) {
msg <- paste("WARNING:", length(unlist(duplicatedTaxa)))
if (length(unlist(duplicatedTaxa)) == 1)
msg <- paste(msg, "duplicated taxon has been ignored!")
else
msg <- paste(msg, "duplicated taxa have been ignored!")
if (length(unlist(duplicatedTaxa)) < 5) {
msg <- paste(msg, "Including: ")
msg <- c(msg, paste(unlist(duplicatedTaxa), collapse = "; "))
}
message(msg)
}
nameList <- nameList[!(nameList$tip %in% unlist(duplicatedTaxa)),]
rankList <- rankList[!(rankList$tip %in% unlist(duplicatedTaxa)),]
# get indexed rank list
index2RankDf <- rank_indexing(rankList)
# get ordered rank list
Expand Down
14 changes: 12 additions & 2 deletions tests/testthat/test-class2tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ spnames <- c("Klattia flava", "Trollius sibiricus", "Arachis paraguariensis",
dupnames <- c("Mus musculus", "Escherichia coli",
"Haloferax denitrificans", "Mus musculus")


duptaxa <- c("Haliotis", "Haliotis cracherodii", "Haliotis rufescens",
"Megabalanus californicus")

test_that("internal functions of class2tree", {
skip_on_cran() # uses secrets
Expand Down Expand Up @@ -76,11 +77,20 @@ test_that("class2tree returns the correct value and class", {
anyDuplicated(gsub("\\.\\d+$", "", names(tr$classification))), 0)
})

test_that("class2tree will abort when input contains duplicate taxa", {
test_that("class2tree will abort when input contains duplicated taxa", {
skip_on_cran() # uses secrets
vcr::use_cassette("class2tree_classification_dup_call", {
out <- classification(dupnames, db = "ncbi", messages = FALSE)
})
expect_error(class2tree(out),
"Input list of classifications contains duplicates")
})

test_that("class2tree detects duplicated taxa in higher levels", {
skip_on_cran() # uses secrets
vcr::use_cassette("class2tree_classification_dup_high_level", {
out <- classification(duptaxa, db = "ncbi", messages = FALSE)
})
tree <- class2tree(out)
expect_true(nrow(tree$classification) < length(duptaxa))
})

0 comments on commit fe1ec55

Please sign in to comment.