Skip to contents

Fit and validate Generalized Linear Models

Usage

fit_abund_glm(
  data,
  response,
  predictors,
  predictors_f = NULL,
  fit_formula = NULL,
  sigma_formula = ~1,
  nu_formula = ~1,
  tau_formula = ~1,
  partition,
  predict_part = FALSE,
  distribution = NULL,
  poly = 0,
  inter_order = 0,
  control_gamlss = gamlss::gamlss.control(trace = FALSE),
  verbose = TRUE
)

Arguments

data

tibble or data.frame. Database with response, predictors, and partition values

response

character. Column name with species abundance.

predictors

character. Vector with the column names of quantitative predictor variables (i.e. continuous variables). Usage predictors = c("temp", "precipt", "sand")

predictors_f

character. Vector with the column names of qualitative predictor variables (i.e. ordinal or nominal variables type). Usage predictors_f = c("landform")

fit_formula

formula. A formula object with response and predictor variables (e.g. formula(abund ~ temp + precipt + sand + landform)). Note that the variables used here must be consistent with those used in response, predictors, and predictors_f arguments. Default NULL

sigma_formula

formula. formula for fitting a model to the nu parameter. Usage sigma_formula = ~ precipt + temp

nu_formula

formula. formula for fitting a model to the nu parameter. Usage nu_formula = ~ precipt + temp

tau_formula

formula. formula for fitting a model to the tau parameter. Usage tau_formula = ~ precipt + temp

partition

character. Column name with training and validation partition groups.

predict_part

logical. Save predicted abundance for testing data. Default is FALSE.

distribution

character. A string specifying the distribution to be used. See gamlss.family documentation for details. Use distribution = gamlss.dist::NO(). Default NULL

poly

integer >= 2. If used with values >= 2 model will use polynomials for those continuous variables (i.e. used in predictors argument). Default is 0.

inter_order

integer >= 0. The interaction order between explanatory variables. Default is 0.

control_gamlss

function. control parameters of the outer iterations algorithm in gamlss See gamlss.control documentation for details. Default gamlss.control()

verbose

logical. If FALSE, disables all console messages. Default TRUE

Value

A list object with:

  • model: A "gamlss" class object from gamlss package. This object can be used for predicting.

  • predictors: A tibble with quantitative (c column names) and qualitative (f column names) variables use for modeling.

  • performance: Averaged performance metrics (see adm_eval).

  • performance_part: Performance metrics for each replica and partition.

  • predicted_part: Observed and predicted abundance for each test partition.

Examples

if (FALSE) {
require(terra)
require(dplyr)
require(gamlss)

# Database with species abundance and x and y coordinates
data("sppabund")

# Extract data for a single species
some_sp <- sppabund %>%
  dplyr::filter(species == "Species one") %>%
  dplyr::select(-.part2, -.part3)

# Explore reponse variables
some_sp$ind_ha %>% range()
some_sp$ind_ha %>% hist()

# Here we balance number of absences
some_sp <-
  balance_dataset(some_sp, response = "ind_ha", absence_ratio = 0.2)

# Explore different family distributions
family_selector(data = some_sp, response = "ind_ha") %>% tail()

# Fit a GLM model
glm_1 <- fit_abund_glm(
  data = some_sp,
  response = "ind_ha",
  predictors = c("bio12", "elevation", "sand"),
  predictors_f = c("eco"),
  partition = ".part",
  distribution = "ZAGA",
  poly = 0,
  inter_order = 0,
  predict_part = TRUE
)

glm_1

# Using second order polynomials and first order interaction terms
glm_2 <- fit_abund_glm(
  data = some_sp,
  response = "ind_ha",
  predictors = c("bio12", "elevation", "sand"),
  predictors_f = c("eco"),
  partition = ".part",
  distribution = "ZAGA",
  poly = 2,
  inter_order = 1,
  predict_part = TRUE
)

glm_2

# Using third order polynomials and second order interaction terms
glm_3 <- fit_abund_glm(
  data = some_sp,
  response = "ind_ha",
  predictors = c("bio12", "elevation", "sand"),
  predictors_f = c("eco"),
  partition = ".part",
  distribution = "ZAGA",
  poly = 3,
  inter_order = 2,
  predict_part = TRUE
)

glm_3

# Setting formulas for different distribution parameters
glm_4 <- fit_abund_glm(
  data = some_sp,
  response = "ind_ha",
  predictors = c("bio12", "elevation", "sand"),
  predictors_f = c("eco"),
  partition = ".part",
  distribution = "ZAGA",
  fit_formula = ind_ha ~ bio12 + elevation + sand + eco,
  sigma_formula = ind_ha ~ bio12 + elevation + sand,
  poly = 0,
  inter_order = 0,
  predict_part = TRUE
)

glm_4
}