i'm using caret function "train()" in 1 of project , i'd add "custom metric" f1-score. looked @ url caret package cannot understand how can build score parameter available.
there example of custom metric following:
## example custom metric madsummary <- function (data, lev = null, model = null) { out <- mad(data$obs - data$pred, na.rm = true) names(out) <- "mad" out } robustcontrol <- traincontrol(summaryfunction = madsummary) marsgrid <- expand.grid(degree = 1, nprune = (1:10) * 2) earthfit <- train(medv ~ ., data = bostonhousing, method = "earth", tunegrid = marsgrid, metric = "mad", maximize = false, trcontrol = robustcontrol)
update:
i tried code problem doesn't work multiple classes code below (the f1 score displayed, weird) i'm not sure think function f1_score works on binary classes
library(caret) library(mlmetrics) set.seed(346) dat <- iris ## see http://topepo.github.io/caret/training.html#metrics f1 <- function(data, lev = null, model = null) { print(data) f1_val <- f1_score(y_pred = data$pred, y_true = data$obs) c(f1 = f1_val) } # split data .75 input in_train <- createdatapartition(dat$species, p = .70, list = false) trainclass <- dat[in_train,] testclass <- dat[-in_train,] set.seed(35) mod <- train(species ~ ., data = trainclass , method = "rpart", metric = "f1", trcontrol = traincontrol(summaryfunction = f1, classprobs = true)) print(mod)
i coded manual f1 score well, 1 input confusion matrix: (i'm not sure if can have confusion matrix in "summaryfunction"
f1_score <- function(mat, algoname){ ## ## compute f1-score ## # remark: left column = prediction // top = real values recall <- matrix(1:nrow(mat), ncol = nrow(mat)) precision <- matrix(1:nrow(mat), ncol = nrow(mat)) f1_score <- matrix(1:nrow(mat), ncol = nrow(mat)) for(i in 1:nrow(mat)){ recall[i] <- mat[i,i]/rowsums(mat)[i] precision[i] <- mat[i,i]/colsums(mat)[i] } for(i in 1:ncol(recall)){ f1_score[i] <- 2 * ( precision[i] * recall[i] ) / ( precision[i] + recall[i]) } # display matrix labels colnames(f1_score) <- colnames(mat) rownames(f1_score) <- algoname # display f1_score each class f1_score # display average f1_score mean(f1_score[1,]) }
you should here details. working example
library(caret) library(mlmetrics) set.seed(346) dat <- twoclasssim(200) ## see http://topepo.github.io/caret/training.html#metrics f1 <- function(data, lev = null, model = null) { f1_val <- f1_score(y_pred = data$pred, y_true = data$obs, positive = lev[1]) c(f1 = f1_val) } set.seed(35) mod <- train(class ~ ., data = dat, method = "rpart", tunelength = 5, metric = "f1", trcontrol = traincontrol(summaryfunction = f1, classprobs = true))
max