diff --git a/DESCRIPTION b/DESCRIPTION
index 918d0a4..225ed2b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: HMDA
 Type: Package
 Title: Holistic Multimodel Domain Analysis for Exploratory Machine Learning
-Version: 0.1
+Version: 0.2.0
 Authors@R: 
     person("E. F. Haghish",
            role = c("aut", "cre", "cph"),
@@ -24,5 +24,5 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 RoxygenNote: 7.3.2
 LazyData: true
-URL: http://dx.doi.org/10.13140/RG.2.2.32473.63846, https://github.com/haghish/HMDA, https://www.sv.uio.no/psi/english/people/academic/haghish/
+URL: http://dx.doi.org/10.13140/RG.2.2.32473.63846, https://github.com/haghish/HMDA
 BugReports: https://github.com/haghish/HMDA/issues
diff --git a/NAMESPACE b/NAMESPACE
index 95c9479..7806bcf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ export(dictionary)
 export(hmda.adjust.params)
 export(hmda.autoEnsemble)
 export(hmda.best.models)
+export(hmda.compare.shap.plot)
 export(hmda.domain)
 export(hmda.efa)
 export(hmda.feature.selection)
@@ -12,8 +13,11 @@ export(hmda.grid)
 export(hmda.grid.analysis)
 export(hmda.init)
 export(hmda.partition)
+export(hmda.plot)
+export(hmda.plot.metrics)
 export(hmda.search.param)
 export(hmda.suggest.param)
+export(hmda.test)
 export(hmda.wmshap)
 export(hmda.wmshap.table)
 export(suggest_mtries)
@@ -70,6 +74,9 @@ importFrom(psych,factor.scores)
 importFrom(psych,omega)
 importFrom(shapley,shapley)
 importFrom(shapley,shapley.domain)
+importFrom(shapley,shapley.domain.test)
+importFrom(shapley,shapley.feature.test)
+importFrom(shapley,shapley.plot)
 importFrom(stats,aggregate)
 importFrom(stats,cor)
 importFrom(stats,formula)
diff --git a/R/hmda.best.models.R b/R/hmda.best.models.R
index 137c792..ad171fc 100644
--- a/R/hmda.best.models.R
+++ b/R/hmda.best.models.R
@@ -73,17 +73,30 @@
 #' @importFrom utils head
 #' @export
 #' @author E. F. Haghish
-hmda.best.models <- function(df, n_models = 1) {
+hmda.best.models <- function(df,
+                             n_models = NULL,
+                             metrics = c("logloss", "mae", "mse", "rmse", "rmsle",
+                                         "mean_per_class_error", "auc", "aucpr",
+                                         "r2", "accuracy", "f1", "mcc", "f2"),
+                             distance_percentage = NULL,
+                             hyperparam = FALSE
+) {
 
   if (!inherits(df, "hmda.grid.analysis")) {
     warning("'df' is not of class 'hmda.grid.analysis'... watch out!")
   }
 
-  # Define known performance metrics and their optimization directions
-  known_metrics <- c("logloss", "mae", "mse", "rmse", "rmsle",
-                     "mean_per_class_error", "auc", "aucpr",
-                     "r2", "accuracy", "f1", "mcc", "f2")
+  if (!is.null(n_models) & !is.null(distance_percentage)) {
+    stop("either specify 'n_models' or 'distance_percentage'")
+  }
+  else if (is.null(n_models) & is.null(distance_percentage)) {
+    n_models <- 1
+  }
 
+  result <- NULL
+  sort_direction_list <- NULL
+
+  # Define known performance metrics and their optimization directions
   directions <- c(
     logloss = "minimize",
     mae = "minimize",
@@ -100,6 +113,24 @@ hmda.best.models <- function(df, n_models = 1) {
     f2 = "maximize"
   )
 
+  sort_direction <- c(
+    logloss = FALSE,
+    mae = FALSE,
+    mse = FALSE,
+    rmse = FALSE,
+    rmsle = FALSE,
+    mean_per_class_error = FALSE,
+    auc = TRUE,
+    aucpr = TRUE,
+    r2 = TRUE,
+    accuracy = TRUE,
+    f1 = TRUE,
+    mcc = TRUE,
+    f2 = TRUE
+  )
+
+  #directions <- directions[directions %in% metrics]
+
   # Check for the existence of the model_ids column
   if (!"model_ids" %in% names(df)) {
     stop("The data frame must contain a 'model_ids' column.")
@@ -109,35 +140,75 @@ hmda.best.models <- function(df, n_models = 1) {
   best_model_ids <- c()
 
   # Loop over each known metric and determine the top n_models best model IDs
-  for (metric in known_metrics) {
-    if (metric %in% names(df)) {
-      vals <- df[[metric]]
+  for (met in metrics) {
+    if (met %in% names(df)) {
+      vals <- df[[met]]
       if (all(is.na(vals))) next  # Skip metric if all values are NA
 
-      dir <- directions[[metric]]
+      dir <- directions[[met]]
       if (is.null(dir)) dir <- "minimize"
 
       # Order indices according to performance (top n_models)
       if (dir == "maximize") {
         ordered_idx <- order(vals, decreasing = TRUE)
+        sort_direction_list <- c(sort_direction_list, TRUE)
       } else {
         ordered_idx <- order(vals, decreasing = FALSE)
+        sort_direction_list <- c(sort_direction_list, FALSE)
       }
 
-      top_idx <- head(ordered_idx, n_models)
-      best_model_ids <- c(best_model_ids, df$model_ids[top_idx])
+      # Select the models for each metric
+      if (!is.null(n_models)) {
+        top_idx <- head(ordered_idx, n_models)
+        best_model_ids <- c(best_model_ids, df$model_ids[top_idx])
+      }
+      else if (!is.null(distance_percentage)) {
+        if (dir == "maximize") {
+          best_value <- max(df[[met]], na.rm = TRUE)
+          best_model_ids <- c(best_model_ids, df$model_ids[which(df[[met]] >= best_value * distance_percentage)])
+        }
+        else {
+          best_value <- min(df[[met]], na.rm = TRUE)
+          best_model_ids <- c(best_model_ids, df$model_ids[which(df[[met]] <= best_value * (1-distance_percentage))])
+        }
+      }
     }
   }
 
-  # Get the unique union of best model IDs
-  best_model_ids <- unique(best_model_ids)
+  # # Get the unique union of best model IDs
+  # best_model_ids <- unique(best_model_ids)
 
   # Determine which known metric columns exist in df
-  existing_metrics <- intersect(known_metrics, names(df))
+  existing_metrics <- intersect(metrics, names(df))
+
+  # print(df[df$model_ids %in% best_model_ids, c("model_ids", existing_metrics), drop = FALSE])
 
   # Subset the original data frame for the best models and only include
   # the model_ids and the performance metric columns.
-  result <- df[df$model_ids %in% best_model_ids, c("model_ids", existing_metrics), drop = FALSE]
+  if (hyperparam) result <- df[df$model_ids %in% best_model_ids, , drop = FALSE]
+  else result <- df[df$model_ids %in% best_model_ids, c("model_ids", existing_metrics), drop = FALSE]
+
+
+  # sort the columns for all the existing_metrics columns in order
+  # ==========================================================
+  # result <- result[with(result, order(existing_metrics)), ]
+  order_by_cols <- function(df, cols, decreasing = FALSE, na.last = TRUE) {
+    stopifnot(all(cols %in% names(df)))
+    if (length(decreasing) == 1L) decreasing <- rep(decreasing, length(cols))
+    stopifnot(length(decreasing) == length(cols))
+
+    keys <- Map(function(col, dec) {
+      k <- xtfrm(df[[col]])     # numeric ranking for any type
+      if (dec) -k else k
+    }, cols, decreasing)
+
+    ord <- do.call(order, c(keys, list(na.last = na.last)))
+    df[ord, , drop = FALSE]
+  }
+  result <- order_by_cols(result, existing_metrics, decreasing = sort_direction_list)
+  # ord <- do.call(order, c(as.list(result[existing_metrics]), list(na.last = TRUE)))
+  # result <- result[ord, , drop = FALSE]
+
 
   # Drop metric columns that are entirely NA in the resulting subset
   for (col in existing_metrics) {
@@ -148,3 +219,6 @@ hmda.best.models <- function(df, n_models = 1) {
 
   return(result)
 }
+
+
+
diff --git a/R/hmda.compare.shap.plot.R b/R/hmda.compare.shap.plot.R
new file mode 100644
index 0000000..b903024
--- /dev/null
+++ b/R/hmda.compare.shap.plot.R
@@ -0,0 +1,124 @@
+
+
+#' @importFrom shapley shapley shapley.plot
+#'
+#' @export
+
+hmda.compare.shap.plot <- function(hmda.grid.analysis,
+                                   newdata = train.hex,
+                                   model_id = NULL,
+                                   metrics = c("aucpr", "mcc", "f2"),
+                                   plot = "shap",
+                                   top_n_features = 4) {
+
+  # store the plots
+  plot_list <- list()
+  MODEL_IDS   <- NULL
+  MODEL_NAMES <- NULL
+  n = 0
+
+  # get the model ids
+  if (is.null(model_id)) {
+    for (m in metrics) {
+      MODEL_IDS  <- c(MODEL_IDS, hmda.best.models(hmda.grid.analysis, n_models = 1, metrics = m)$model_ids)
+      MODEL_NAMES<-c(MODEL_NAMES, paste("\nHighest", toupper(m)))
+    }
+  }
+  else {
+    MODEL_IDS <- model_id
+    for (m in seq(length(MODEL_IDS))) {
+      MODEL_NAMES<-c(MODEL_NAMES, paste("\nModel", m))
+    }
+  }
+
+  for (id in MODEL_IDS) {
+    n <- n + 1
+
+    # get the model
+    # ============================================================
+    model <- h2o.getModel(id)
+
+    if (plot == "shap") {
+      p <- h2o.shap_summary_plot(model,
+                                 newdata,
+                                 top_n = top_n_features) +
+
+        scale_y_continuous(limits = c(-1, 1),
+                           breaks = seq(-1, 1, by = 0.5)) +
+
+        ggtitle(MODEL_NAMES[n]) +
+        xlab("") +
+        ylab("") +
+        theme_classic() +
+        labs(colour = "Normalized values\n") +
+        theme(
+          legend.position="none",
+          legend.justification = "right",
+          legend.title.align = 0.5,
+          legend.direction = "horizontal",
+          legend.text=element_text(colour="black", size=6, face="bold"),
+          plot.title = element_text(size = 10),
+          legend.key.height = grid::unit(0.3, "cm"),
+          legend.key.width = grid::unit(1, "cm"),
+          #legend.margin=margin(grid::unit(0,"cm")),
+          legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "cm"),
+          #plot.margin = margin(t = -0.3, r = .25, b = .25, l = .25, unit = "cm")  # Reduce top plot margin
+        )
+    }
+
+    if (plot == "bar") {
+      resultsnow <- shapley(
+        n_models = 1,
+        standardize_performance_metric = TRUE, #for a single model
+        models = id,
+        newdata = newdata,
+        top_n_features = top_n_features,
+        plot = FALSE)
+
+      p <- shapley.plot(resultsnow, top_n_features = top_n_features)
+
+      p <- p +
+        ggtitle(MODEL_NAMES[n]) +
+        xlab("") +
+        ylab("")
+      #   theme_classic() +
+      theme(
+        #     legend.position="none",
+        #     legend.justification = "right",
+        #     legend.title.align = 0.5,
+        #     legend.direction = "horizontal",
+        #     legend.text=element_text(colour="black", size=6, face="bold"),
+        plot.title = element_text(size = 10),
+        #     legend.key.height = grid::unit(0.3, "cm"),
+        #     legend.key.width = grid::unit(1, "cm"),
+        #     #legend.margin=margin(grid::unit(0,"cm")),
+        #     legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "cm"),
+        #     #plot.margin = margin(t = -0.3, r = .25, b = .25, l = .25, unit = "cm")  # Reduce top plot margin
+      )
+    }
+
+    # save the plot in the list
+    plot_list[[n]] <- p
+  }
+
+
+  # COMPARE PLOTS
+  # ============================================================
+  library(gridExtra)
+  combined <- grid.arrange(grobs = plot_list,
+                           nrow = 1,
+                           left = "Top 5 features",
+                           bottom = "Comparison of SHAP contributions of top 5 features across three GBM models"
+  )
+
+  print(combined)
+  return(combined)
+}
+
+# print(compare.shap.plot(hmda.grid.analysis = grid_analysis,
+#                         newdata = splits$hmda.test.hex,
+#                         metrics = c("aucpr", "mcc", "f2"),
+#                         plot = "bar",
+#                         top_n_features = 5))
+
+
diff --git a/R/hmda.domain.R b/R/hmda.domain.R
index a0f8aa1..8854f53 100644
--- a/R/hmda.domain.R
+++ b/R/hmda.domain.R
@@ -1,6 +1,6 @@
 #' @title compute and plot weighted mean SHAP contributions at group level (factors or domains)
 #' @description This function applies different criteria to visualize SHAP contributions
-#' @param shapley object of class 'shapley', as returned by the 'shapley' function
+#' @param wmshap object of class 'shapley', as returned by the 'shapley' function
 #' @param plot character, specifying the type of the plot, which can be either
 #'            'bar', 'waffle', or 'shap'. The default is 'bar'.
 #' @param domains character list, specifying the domains for grouping the features'
@@ -93,7 +93,7 @@
 #'                  Group2 = c("x25", "x23", "x6", "x27"),
 #'                  Group3 = c("x28", "x26"))
 #'
-#'   hmda.domain(shapley = wmshap,
+#'   hmda.domain(wmshap = wmshap,
 #'               plot = "bar",
 #'               domains = domains,
 #'               print = TRUE)
@@ -102,21 +102,23 @@
 #' @author E. F. Haghish
 
 
-hmda.domain <- function(shapley,
+hmda.domain <- function(wmshap,
                         domains,
                         plot = "bar",
                         legendstyle = "continuous",
                         scale_colour_gradient = NULL, #this is a BUG because it is not implemented
                         # COLORCODE IS MISSING :(
-                        print = FALSE) {
+                        print = FALSE,
+                        xlab = "Factors") {
 
   return(
-    shapley.domain(shapley = shapley,
+    shapley.domain(shapley = wmshap,
              domains = domains,
              plot = plot,
              legendstyle = legendstyle,
              scale_colour_gradient = scale_colour_gradient, #this is a BUG because it is not implemented
              # COLORCODE IS MISSING :(
-             print = print)
+             print = print,
+             xlab = xlab)
   )
 }
diff --git a/R/hmda.efa.R b/R/hmda.efa.R
index 02316fa..5b16ca0 100644
--- a/R/hmda.efa.R
+++ b/R/hmda.efa.R
@@ -159,7 +159,7 @@ hmda.efa <- function(df,
     #   message(paste(pa$value$nfact, "factors are recommended, but there might be better solutions (for example, up to", sum(pa$value$fa.values > 0), "factors)"))
     # }
     # else message(paste(pa$value$nfact, "factors are recommended"))
-    message(paste(pa$value$nfact, "factors are recommended"))
+    message(paste(pa$value$nfact, "factors are recommended\n\n==============================\n\n"))
   }
 
   # Run the exploratory factor analysis
@@ -169,10 +169,6 @@ hmda.efa <- function(df,
                           fm = algorithm,
                           rotate = rotation)
 
-
-
-  loadings <- EFAresults$loadings
-  #
   # EFAresults <- factanal(~ .,
   #                        data = df[, features],
   #                        factors = if (!is.null(nfactors)) nfactors else pa$value$nfact,
diff --git a/R/hmda.grid.R b/R/hmda.grid.R
index 19dfa98..4721665 100644
--- a/R/hmda.grid.R
+++ b/R/hmda.grid.R
@@ -86,7 +86,7 @@
 #'                           nfolds = 10,
 #'                           ntrees = 100,
 #'                           seed = 1,
-#'                           hyper_params = gbm_params1)
+#'                           hyper_params = params)
 #'
 #'   # Assess the performances of the models
 #'   grid_performance <- hmda.grid.analysis(hmda_grid1)
diff --git a/R/hmda.grid.analysis.R b/R/hmda.grid.analysis.R
index cfd4bae..2c56378 100644
--- a/R/hmda.grid.analysis.R
+++ b/R/hmda.grid.analysis.R
@@ -87,7 +87,8 @@
 hmda.grid.analysis <- function(grid,
                                performance_metrics =
                                  c("logloss", "mse", "rmse", "rmsle", "auc",
-                                   "aucpr", "mean_per_class_error", "r2"),
+                                   "aucpr", "mean_per_class_error", "r2",
+                                   "f1","f2","mcc","kappa"),
                                sort_by = "logloss") {
 
   # prepare the performance dataset
@@ -118,12 +119,13 @@ hmda.grid.analysis <- function(grid,
       else if (j == "rmse") try(performance[i, j] <- h2o.rmse(MODEL, xval = TRUE), silent = TRUE)
 
       # for thresholds metrics...
-      else if (j %in% c("f1","f2","mcc","kappa")) {
+      else if (j %in% c("mean_per_class_error","f2","mcc","kappa")) {
         PERF <- h2o.performance(model = MODEL, xval= TRUE)
-        if (j == "f1") try(performance[i, j] <- h2o.F1(PERF), silent = TRUE)
-        else if (j == "f2") try(performance[i, j] <- h2o.F2(PERF), silent = TRUE)
-        else if (j == "mcc") try(performance[i, j] <- h2o.mcc(PERF), silent = TRUE)
-        else if (j == "kappa") try(performance[i, j] <- h2otools::kappa(PERF), silent = TRUE)
+        # if (j == "f1") try(performance[i, j] <- h2o.F1(PERF), silent = TRUE)
+        if (j == "mean_per_class_error") try(performance[i, j] <- PERF@metrics$mean_per_class_error, silent = TRUE)
+        else if (j == "f2") try(performance[i, j] <- PERF@metrics$max_criteria_and_metric_scores[2,3], silent = TRUE)
+        else if (j == "mcc") try(performance[i, j] <- PERF@metrics$max_criteria_and_metric_scores[8,3], silent = TRUE)
+        else if (j == "kappa") try(performance[i, j] <- h2otools::kappa(PER, max=TRUE), silent = TRUE)
       }
     }
   }
diff --git a/R/hmda.plot.R b/R/hmda.plot.R
new file mode 100644
index 0000000..1b4409e
--- /dev/null
+++ b/R/hmda.plot.R
@@ -0,0 +1,104 @@
+#' @title Plot WMSHAP contributions
+#' @description This function applies different criteria to visualize WMSHAP contributions
+#' @param shapley object of class 'shapley', as returned by the 'shapley' function
+#' @param plot character, specifying the type of the plot, which can be either
+#'            'bar', 'waffle', or 'shap'. The default is 'bar'.
+#' @param method Character. The column name in \code{summaryShaps} used
+#'                           for feature selection. Default is \code{"mean"}, which
+#'                           selects important features which have weighted mean shap
+#'                           ratio (WMSHAP) higher than the specified cutoff. Other
+#'                           alternative is "lowerCI", which selects features which
+#'                           their lower bound of confidence interval is higher than
+#'                           the cutoff.
+#' @param cutoff numeric, specifying the cutoff for the method used for selecting
+#'               the top features.
+#' @param top_n_features Integer. If specified, the top n features with the
+#'                       highest weighted SHAP values will be selected, overrullung
+#'                       the 'cutoff' and 'method' arguments.
+#' @param features character vector, specifying the feature to be plotted.
+#' @param legendstyle character, specifying the style of the plot legend, which
+#'                    can be either 'continuous' (default) or 'discrete'. the
+#'                    continuous legend is only applicable to 'shap' plots and
+#'                    other plots only use 'discrete' legend.
+#' @param scale_colour_gradient character vector for specifying the color gradients
+#'                              for the plot.
+#' @param labels provide feature labels for items in the dataset. If specified,
+#'               feature descriptions will be shown in the plot instead of the
+#'               feature names as included in the dataset. To specify the labels,
+#'               use the \code{c} function and list the labes, e.g.,
+#'               \code{c(feature1 = label2, feature2 = label2, ...)}.
+# @importFrom waffle waffle
+#' @importFrom h2o h2o.shap_summary_plot h2o.getModel
+#' @importFrom ggplot2 scale_colour_gradient2 theme guides guide_legend guide_colourbar
+#'             margin element_text theme_classic labs ylab xlab ggtitle
+#' @author E. F. Haghish
+#' @return ggplot object
+#' @examples
+#'
+#' \dontrun{
+#' # load the required libraries for building the base-learners and the ensemble models
+#' library(h2o)            #shapley supports h2o models
+#' library(shapley)
+#'
+#' # initiate the h2o server
+#' h2o.init(ignore_config = TRUE, nthreads = 2, bind_to_localhost = FALSE, insecure = TRUE)
+#'
+#' # upload data to h2o cloud
+#' prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
+#' prostate <- h2o.importFile(path = prostate_path, header = TRUE)
+#'
+#' ### H2O provides 2 types of grid search for tuning the models, which are
+#' ### AutoML and Grid. Below, I demonstrate how weighted mean shapley values
+#' ### can be computed for both types.
+#'
+#' set.seed(10)
+#'
+#' #######################################################
+#' ### PREPARE AutoML Grid (takes a couple of minutes)
+#' #######################################################
+#' # run AutoML to tune various models (GBM) for 60 seconds
+#' y <- "CAPSULE"
+#' prostate[,y] <- as.factor(prostate[,y])  #convert to factor for classification
+#' aml <- h2o.automl(y = y, training_frame = prostate, max_runtime_secs = 120,
+#'                  include_algos=c("GBM"),
+#'
+#'                  # this setting ensures the models are comparable for building a meta learner
+#'                  seed = 2023, nfolds = 10,
+#'                  keep_cross_validation_predictions = TRUE)
+#'
+#' ### call 'shapley' function to compute the weighted mean and weighted confidence intervals
+#' ### of SHAP values across all trained models.
+#' ### Note that the 'newdata' should be the testing dataset!
+#' result <- shapley(models = aml, newdata = prostate, plot = TRUE)
+#'
+#' #######################################################
+#' ### PLOT THE WEIGHTED MEAN SHAP VALUES
+#' #######################################################
+#'
+#' shapley.plot(result, plot = "bar")
+# shapley.plot(result, plot = "waffle")
+#' }
+#' @export
+
+hmda.plot <- function(wmshap,
+                         plot = "bar",
+                         method = "mean",
+                         cutoff = 0.01,
+                         top_n_features = NULL,
+                         features = NULL,
+                         legendstyle = "continuous",
+                         scale_colour_gradient = NULL,
+                         labels = NULL) {
+
+  shapley.plot(wmshap,
+           plot = plot,
+           method = method,
+           cutoff = cutoff,
+           top_n_features = top_n_features,
+           features = features,
+           legendstyle = legendstyle,
+           scale_colour_gradient = scale_colour_gradient,
+           labels = labels)
+}
+
+
diff --git a/R/hmda.plot.metrics.R b/R/hmda.plot.metrics.R
new file mode 100644
index 0000000..8bcc1e2
--- /dev/null
+++ b/R/hmda.plot.metrics.R
@@ -0,0 +1,128 @@
+
+
+
+
+#' @export
+#' @author E. F. Haghish
+
+# plot the AUCPR, MCC, and F2 values on a line with ggplot2
+hmda.plot.metrics <- function(df,
+                                metrics,
+                                criteria = "top_models",
+                                top_models = 100,
+                                distance_percentage = 0.99,
+                                plot = TRUE) {
+  library(reshape2)
+  library(ggplot2)
+  IDS  <- NULL
+  # range  <- NULL
+  # min <- NULL
+  # max <- NULL
+
+  # subset models
+  # ============================================================
+  if (criteria == "top_models") {
+    df <- df[1:top_models, ]
+    # range <- c(min(df[, metrics]), max(df[, metrics]))
+  }
+  else if (criteria == "distance_percentage") {
+
+    # for each metric get the best model performance
+    for (met in metrics) {
+      best_value <- max(df[[met]], na.rm = TRUE)
+      IDS <- c(IDS, df$model_ids[which(df[[met]] >= best_value * distance_percentage)])
+      # range <- c(range, best_value, best_value * distance_percentage)
+    }
+
+    IDS <- unique(IDS)
+    df <- df[df$model_ids %in% IDS, ]
+  }
+
+  # # round range to go to closest .05
+  # min <- floor(range[1] * 20) / 20
+  # max <- ceiling(range[2] * 20) / 20
+
+  # change the IDs to index
+  grid_legnth <- nrow(df)
+  df$model_ids <- as.integer(seq(1, grid_legnth))
+
+
+  melted <- melt(df, id.vars = "model_ids", measure.vars = metrics)
+
+
+  trends_plot <- ggplot(melted, aes(x = model_ids, y = value, color = variable)) +
+    scale_color_brewer(palette = "Set1") +
+    geom_line(size = 1, alpha = .3) +
+    geom_point(size = 2, alpha = .5) +
+    scale_x_continuous(breaks = seq(0, grid_legnth, by = 10)) +
+    scale_y_continuous(#limits = c(min, max) , breaks = seq(0.6, 0.8, by = 0.1), expand = c(0, 0)
+    ) +
+    labs(x = "\nIndex of models (sorted by AUCPR)",
+         y = "Performance metrics\n",
+         color = "") +
+    #ggtitle("Comparison of AUCPR, MCC, and F2 across top 100 GBM models") +
+    theme_classic() +
+    theme(
+      legend.position="top",
+      legend.justification = "right",
+      legend.title.align = 0.5,
+      legend.direction = "horizontal",
+      legend.text=element_text(colour="black", size=8, face="bold"),
+      plot.title = element_text(size = 12),
+      legend.key.height = grid::unit(0.4, "cm"),
+      legend.key.width = grid::unit(1.2, "cm"),
+      #legend.margin=margin(grid::unit(0,"cm")),
+      legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "cm"),
+      #plot.margin = margin(t = 0.3, r = .3, b = .25, l = .2, unit = "cm")  # Reduce top plot margin
+    )
+
+  # get the colors used in the plot
+  #plot_colors <- scales::hue_pal()(length(metrics))
+
+  # make a loop for "metrics", get the best value of each metric, and plot the circle with a higher-alpha color
+  for (i in seq_along(metrics)) {
+    m <- metrics[i]
+    best_metric_index <- which.max(df[[m]])
+    trends_plot <- trends_plot +
+      geom_point(data = melted[melted$model_ids == best_metric_index & melted$variable == m, ], size = 2, alpha = 1) +
+      # geom_point(data = melted[melted$model_ids == best_metric_index & melted$variable == m, ],
+      #            aes(x = model_ids, y = value),
+      #            #color = "black",
+      #            size = 3,
+      #            shape = 21,
+      #            #fill = "yellow"
+      # ) +
+      geom_vline(xintercept = best_metric_index,
+                 linetype="dashed",
+                 # add color black with alpha set to 0.5
+                 #color = "red",
+                 alpha = 0.15
+
+      ) #+
+    # annotate("text", x = best_metric_index + 5, y = melted$value[melted$model_ids == best_metric_index & melted$variable == m],
+    #          label = paste("Best", toupper(m)),
+    #          angle = 90, vjust = -0.5, size = 3)
+  }
+
+  # make a loop for "metrics" and print the vertical line for all of them
+  # for (i in seq_along(metrics)) {
+  #   m <- metrics[i]
+  #   best_metric_index <- which.max(df[[m]])
+  #   trends_plot <- trends_plot +
+  #     geom_vline(xintercept = best_metric_index,
+  #                linetype="dashed",
+  #                #color = plot_colors[i]
+  #                ) +
+  #     annotate("text", x = best_metric_index + 5, y = 0.6 + (i - 1) * 0.05,
+  #              label = paste("Best", toupper(m)),
+  #              #color = plot_colors[i],
+  #              angle = 90, vjust = -0.5, size = 3)
+  # }
+
+  if (plot) print(trends_plot)
+
+  return(plot)
+}
+
+
+
diff --git a/R/hmda.test.R b/R/hmda.test.R
new file mode 100644
index 0000000..3f4c77b
--- /dev/null
+++ b/R/hmda.test.R
@@ -0,0 +1,81 @@
+#' @title Normalize a vector based on specified minimum and maximum values
+#' @description This function normalizes a vector based on specified minimum
+#'              and maximum values. If the minimum and maximum values are not
+#'              specified, the function will use the minimum and maximum values
+#'              of the vector.
+#' @param wmshap object of class 'shapley', as returned by the 'shapley' function
+#' @param features character, name of two features to be compared with permutation test
+#' @param domains list of two character vectors, each including a number of features.
+#' @param n integer, number of permutations
+#' @importFrom shapley shapley.feature.test shapley.domain.test
+#' @author E. F. Haghish
+#' @return normalized numeric vector
+#' @examples
+#'
+#' \dontrun{
+#' # load the required libraries for building the base-learners and the ensemble models
+#' library(h2o)            #shapley supports h2o models
+#' library(autoEnsemble)   #autoEnsemble models, particularly useful under severe class imbalance
+#' library(shapley)
+#'
+#' # initiate the h2o server
+#' h2o.init(ignore_config = TRUE, nthreads = 2, bind_to_localhost = FALSE, insecure = TRUE)
+#'
+#' # upload data to h2o cloud
+#' prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
+#' prostate <- h2o.importFile(path = prostate_path, header = TRUE)
+#'
+#' ### H2O provides 2 types of grid search for tuning the models, which are
+#' ### AutoML and Grid. Below, I demonstrate how weighted mean shapley values
+#' ### can be computed for both types.
+#'
+#' set.seed(10)
+#'
+#' #######################################################
+#' ### PREPARE AutoML Grid (takes a couple of minutes)
+#' #######################################################
+#' # run AutoML to tune various models (GBM) for 60 seconds
+#' y <- "CAPSULE"
+#' prostate[,y] <- as.factor(prostate[,y])  #convert to factor for classification
+#' aml <- h2o.automl(y = y, training_frame = prostate, max_runtime_secs = 120,
+#'                  include_algos=c("GBM"),
+#'
+#'                  # this setting ensures the models are comparable for building a meta learner
+#'                  seed = 2023, nfolds = 10,
+#'                  keep_cross_validation_predictions = TRUE)
+#'
+#' ### call 'shapley' function to compute the weighted mean and weighted confidence intervals
+#' ### of SHAP values across all trained models.
+#' ### Note that the 'newdata' should be the testing dataset!
+#' result <- shapley(models = aml, newdata = prostate, plot = TRUE)
+#'
+#' #######################################################
+#' ### Significance testing of contributions of two features
+#' #######################################################
+#' # testing the WMSHAP contributions of two features
+#' hmda.test(result, features = c("GLEASON", "PSA"), n = 5000)
+#'
+#' # testing the WMSHAP contributions of two domains (groups of features, latent factors, etc.)
+#' hmda.test(result,
+#'           n = 5000,
+#'           domains = list(Demographic = c("RACE", "AGE"),
+#'                          Cancer = c("VOL", "PSA", "GLEASON")))
+#' }
+#' @export
+
+hmda.test <- function(wmshap, features = NULL, domains = NULL, n = 5000) {
+
+  if (!is.null(features) & !is.null(domains))
+    stop("either specify a vector of 'features' or a list of 'domains'")
+  else if (is.null(features) & is.null(domains))
+    stop("either specify a vector of 'features' or a list of 'domains'")
+
+  else if (!is.null(features)) {
+    shapley.feature.test(shapley = wmshap, features = features, n = n)
+  }
+  else if (!is.null(domains)) {
+    shapley.domain.test(shapley = wmshap, domains, n = n)
+  }
+}
+
+
diff --git a/R/hmda.wmshap.R b/R/hmda.wmshap.R
index b4ee72d..28b1560 100644
--- a/R/hmda.wmshap.R
+++ b/R/hmda.wmshap.R
@@ -170,7 +170,7 @@ hmda.wmshap <- function(models,
                         cutoff = 0.01,
                         top_n_features = NULL,
                         n_models = 10,
-                        sample_size = nrow(newdata)
+                        sample_size = NULL
                         #normalize_to = "upperCI"
 ) {
   return(
@@ -186,7 +186,7 @@ hmda.wmshap <- function(models,
             cutoff = cutoff,
             top_n_features = top_n_features,
             n_models = n_models,
-            sample_size = nrow(newdata)
+            sample_size = sample_size
             #normalize_to = "upperCI"
             )
   )
diff --git a/man/figures/.DS_Store b/man/figures/.DS_Store
index cb9523b..e58dca6 100644
Binary files a/man/figures/.DS_Store and b/man/figures/.DS_Store differ
diff --git a/man/hmda.best.models.Rd b/man/hmda.best.models.Rd
index 3f88b8b..408ece1 100644
--- a/man/hmda.best.models.Rd
+++ b/man/hmda.best.models.Rd
@@ -4,7 +4,14 @@
 \alias{hmda.best.models}
 \title{Select Best Models Across All Models in HMDA Grid}
 \usage{
-hmda.best.models(df, n_models = 1)
+hmda.best.models(
+  df,
+  n_models = NULL,
+  metrics = c("logloss", "mae", "mse", "rmse", "rmsle", "mean_per_class_error", "auc",
+    "aucpr", "r2", "accuracy", "f1", "mcc", "f2"),
+  distance_percentage = NULL,
+  hyperparam = FALSE
+)
 }
 \arguments{
 \item{df}{A data frame of class \code{"hmda.grid.analysis"} containing
diff --git a/man/hmda.domain.Rd b/man/hmda.domain.Rd
index a201f9e..0cc3c60 100644
--- a/man/hmda.domain.Rd
+++ b/man/hmda.domain.Rd
@@ -5,16 +5,17 @@
 \title{compute and plot weighted mean SHAP contributions at group level (factors or domains)}
 \usage{
 hmda.domain(
-  shapley,
+  wmshap,
   domains,
   plot = "bar",
   legendstyle = "continuous",
   scale_colour_gradient = NULL,
-  print = FALSE
+  print = FALSE,
+  xlab = "Factors"
 )
 }
 \arguments{
-\item{shapley}{object of class 'shapley', as returned by the 'shapley' function}
+\item{wmshap}{object of class 'shapley', as returned by the 'shapley' function}
 
 \item{domains}{character list, specifying the domains for grouping the features'
 contributions. Domains are clusters of features' names, that
@@ -111,7 +112,7 @@ This function applies different criteria to visualize SHAP contributions
                  Group2 = c("x25", "x23", "x6", "x27"),
                  Group3 = c("x28", "x26"))
 
-  hmda.domain(shapley = wmshap,
+  hmda.domain(wmshap = wmshap,
               plot = "bar",
               domains = domains,
               print = TRUE)
diff --git a/man/hmda.grid.Rd b/man/hmda.grid.Rd
index c1c9171..0077ef2 100644
--- a/man/hmda.grid.Rd
+++ b/man/hmda.grid.Rd
@@ -122,7 +122,7 @@ The function executes the following steps:
                           nfolds = 10,
                           ntrees = 100,
                           seed = 1,
-                          hyper_params = gbm_params1)
+                          hyper_params = params)
 
   # Assess the performances of the models
   grid_performance <- hmda.grid.analysis(hmda_grid1)
diff --git a/man/hmda.grid.analysis.Rd b/man/hmda.grid.analysis.Rd
index 8be7830..9089385 100644
--- a/man/hmda.grid.analysis.Rd
+++ b/man/hmda.grid.analysis.Rd
@@ -7,7 +7,7 @@
 hmda.grid.analysis(
   grid,
   performance_metrics = c("logloss", "mse", "rmse", "rmsle", "auc", "aucpr",
-    "mean_per_class_error", "r2"),
+    "mean_per_class_error", "r2", "f1", "f2", "mcc", "kappa"),
   sort_by = "logloss"
 )
 }
diff --git a/man/hmda.plot.Rd b/man/hmda.plot.Rd
new file mode 100644
index 0000000..81f36a9
--- /dev/null
+++ b/man/hmda.plot.Rd
@@ -0,0 +1,109 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hmda.plot.R
+\name{hmda.plot}
+\alias{hmda.plot}
+\title{Plot WMSHAP contributions}
+\usage{
+hmda.plot(
+  wmshap,
+  plot = "bar",
+  method = "mean",
+  cutoff = 0.01,
+  top_n_features = NULL,
+  features = NULL,
+  legendstyle = "continuous",
+  scale_colour_gradient = NULL,
+  labels = NULL
+)
+}
+\arguments{
+\item{plot}{character, specifying the type of the plot, which can be either
+'bar', 'waffle', or 'shap'. The default is 'bar'.}
+
+\item{method}{Character. The column name in \code{summaryShaps} used
+for feature selection. Default is \code{"mean"}, which
+selects important features which have weighted mean shap
+ratio (WMSHAP) higher than the specified cutoff. Other
+alternative is "lowerCI", which selects features which
+their lower bound of confidence interval is higher than
+the cutoff.}
+
+\item{cutoff}{numeric, specifying the cutoff for the method used for selecting
+the top features.}
+
+\item{top_n_features}{Integer. If specified, the top n features with the
+highest weighted SHAP values will be selected, overrullung
+the 'cutoff' and 'method' arguments.}
+
+\item{features}{character vector, specifying the feature to be plotted.}
+
+\item{legendstyle}{character, specifying the style of the plot legend, which
+can be either 'continuous' (default) or 'discrete'. the
+continuous legend is only applicable to 'shap' plots and
+other plots only use 'discrete' legend.}
+
+\item{scale_colour_gradient}{character vector for specifying the color gradients
+for the plot.}
+
+\item{labels}{provide feature labels for items in the dataset. If specified,
+feature descriptions will be shown in the plot instead of the
+feature names as included in the dataset. To specify the labels,
+use the \code{c} function and list the labes, e.g.,
+\code{c(feature1 = label2, feature2 = label2, ...)}.}
+
+\item{shapley}{object of class 'shapley', as returned by the 'shapley' function}
+}
+\value{
+ggplot object
+}
+\description{
+This function applies different criteria to visualize WMSHAP contributions
+}
+\examples{
+
+\dontrun{
+# load the required libraries for building the base-learners and the ensemble models
+library(h2o)            #shapley supports h2o models
+library(shapley)
+
+# initiate the h2o server
+h2o.init(ignore_config = TRUE, nthreads = 2, bind_to_localhost = FALSE, insecure = TRUE)
+
+# upload data to h2o cloud
+prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
+prostate <- h2o.importFile(path = prostate_path, header = TRUE)
+
+### H2O provides 2 types of grid search for tuning the models, which are
+### AutoML and Grid. Below, I demonstrate how weighted mean shapley values
+### can be computed for both types.
+
+set.seed(10)
+
+#######################################################
+### PREPARE AutoML Grid (takes a couple of minutes)
+#######################################################
+# run AutoML to tune various models (GBM) for 60 seconds
+y <- "CAPSULE"
+prostate[,y] <- as.factor(prostate[,y])  #convert to factor for classification
+aml <- h2o.automl(y = y, training_frame = prostate, max_runtime_secs = 120,
+                 include_algos=c("GBM"),
+
+                 # this setting ensures the models are comparable for building a meta learner
+                 seed = 2023, nfolds = 10,
+                 keep_cross_validation_predictions = TRUE)
+
+### call 'shapley' function to compute the weighted mean and weighted confidence intervals
+### of SHAP values across all trained models.
+### Note that the 'newdata' should be the testing dataset!
+result <- shapley(models = aml, newdata = prostate, plot = TRUE)
+
+#######################################################
+### PLOT THE WEIGHTED MEAN SHAP VALUES
+#######################################################
+
+shapley.plot(result, plot = "bar")
+}
+}
+\author{
+E. F. Haghish
+}
diff --git a/man/hmda.test.Rd b/man/hmda.test.Rd
new file mode 100644
index 0000000..0acf4f8
--- /dev/null
+++ b/man/hmda.test.Rd
@@ -0,0 +1,81 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hmda.test.R
+\name{hmda.test}
+\alias{hmda.test}
+\title{Normalize a vector based on specified minimum and maximum values}
+\usage{
+hmda.test(wmshap, features = NULL, domains = NULL, n = 5000)
+}
+\arguments{
+\item{wmshap}{object of class 'shapley', as returned by the 'shapley' function}
+
+\item{features}{character, name of two features to be compared with permutation test}
+
+\item{domains}{list of two character vectors, each including a number of features.}
+
+\item{n}{integer, number of permutations}
+}
+\value{
+normalized numeric vector
+}
+\description{
+This function normalizes a vector based on specified minimum
+             and maximum values. If the minimum and maximum values are not
+             specified, the function will use the minimum and maximum values
+             of the vector.
+}
+\examples{
+
+\dontrun{
+# load the required libraries for building the base-learners and the ensemble models
+library(h2o)            #shapley supports h2o models
+library(autoEnsemble)   #autoEnsemble models, particularly useful under severe class imbalance
+library(shapley)
+
+# initiate the h2o server
+h2o.init(ignore_config = TRUE, nthreads = 2, bind_to_localhost = FALSE, insecure = TRUE)
+
+# upload data to h2o cloud
+prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
+prostate <- h2o.importFile(path = prostate_path, header = TRUE)
+
+### H2O provides 2 types of grid search for tuning the models, which are
+### AutoML and Grid. Below, I demonstrate how weighted mean shapley values
+### can be computed for both types.
+
+set.seed(10)
+
+#######################################################
+### PREPARE AutoML Grid (takes a couple of minutes)
+#######################################################
+# run AutoML to tune various models (GBM) for 60 seconds
+y <- "CAPSULE"
+prostate[,y] <- as.factor(prostate[,y])  #convert to factor for classification
+aml <- h2o.automl(y = y, training_frame = prostate, max_runtime_secs = 120,
+                 include_algos=c("GBM"),
+
+                 # this setting ensures the models are comparable for building a meta learner
+                 seed = 2023, nfolds = 10,
+                 keep_cross_validation_predictions = TRUE)
+
+### call 'shapley' function to compute the weighted mean and weighted confidence intervals
+### of SHAP values across all trained models.
+### Note that the 'newdata' should be the testing dataset!
+result <- shapley(models = aml, newdata = prostate, plot = TRUE)
+
+#######################################################
+### Significance testing of contributions of two features
+#######################################################
+# testing the WMSHAP contributions of two features
+hmda.test(result, features = c("GLEASON", "PSA"), n = 5000)
+
+# testing the WMSHAP contributions of two domains (groups of features, latent factors, etc.)
+hmda.test(result,
+          n = 5000,
+          domains = list(Demographic = c("RACE", "AGE"),
+                         Cancer = c("VOL", "PSA", "GLEASON")))
+}
+}
+\author{
+E. F. Haghish
+}
diff --git a/man/hmda.wmshap.Rd b/man/hmda.wmshap.Rd
index 561812d..be77232 100644
--- a/man/hmda.wmshap.Rd
+++ b/man/hmda.wmshap.Rd
@@ -16,7 +16,7 @@ hmda.wmshap(
   cutoff = 0.01,
   top_n_features = NULL,
   n_models = 10,
-  sample_size = nrow(newdata)
+  sample_size = NULL
 )
 }
 \arguments{