Skip to contents

Estimate the specific module using indicator saturation

Usage

estimate_module(
  clean_data,
  dep_var_basename = "imports_of_goods_and_services",
  x_vars_basename = c("gross_capital_formation",
    "household_and_npish_final_consumption_expenditure"),
  use_logs = c("both", "y", "x"),
  trend = TRUE,
  ardl_or_ecm = "ardl",
  max.ar = 4,
  max.dl = 2,
  saturation = c("IIS", "SIS"),
  saturation.tpval = 0.01,
  max.block.size = 20,
  gets_selection = TRUE,
  selection.tpval = 0.01
)

Arguments

clean_data

An input data.frame or tibble. Must be the output of clean_data() to fit all requirements.

dep_var_basename

A character string of the name of the dependent variable as contained in clean_data() in a level form (i.e. no ln or D in front of the name).

x_vars_basename

A character vector of the name(s) of the independent variable(s) as contained in clean_data() in a level form (i.e. no ln or D in front of the name).

use_logs

To decide whether to log any variables. Must be one of "both", "y", or "x". Default is "both".

trend

Logical. To determine whether a trend should be added. Default is TRUE.

ardl_or_ecm

Either 'ardl' or 'ecm' to determine whether to estimate the model as an Autoregressive Distributed Lag Function (ardl) or as an Equilibrium Correction Model (ecm).

max.ar

Integer. The maximum number of lags to use for the AR terms. as well as for the independent variables.

max.dl

Integer. The maximum number of lags to use for the independent variables (the distributed lags).

saturation

Carry out Indicator Saturation using the 'isat' function in the 'gets' package. Needes is a character vector or string. Default is 'c("IIS","SIS")' to carry out Impulse Indicator Saturation and Step Indicator Saturation. Other possible values are 'NULL' to disable or 'TIS' or Trend Indicator Saturation. When disabled, estimation will be carried out using the 'arx' function from the 'gets' package.

saturation.tpval

The target p-value of the saturation methods (e.g. SIS and IIS, see the 'isat' function in the 'gets' package). Default is 0.01.

max.block.size

Integer. Maximum size of block of variables to be selected over, default = 20.

gets_selection

Logical. Whether general-to-specific selection using the 'getsm' function from the 'gets' package should be done on the final saturation model. Default is TRUE.

selection.tpval

Numeric. The target p-value of the model selection methods (i.e. general-to-specific modelling, see the 'getsm' function in the 'gets' package). Default is 0.01.

Value

A list containing all estimated models, with the model with the smallest BIC under 'best_model'.

Examples

sample_data <- dplyr::tibble(
  time = rep(seq.Date(
    from = as.Date("2000-01-01"),
    to = as.Date("2000-12-31"), by = 1
  ), each = 2),
  na_item = rep(c("yvar", "xvar"), 366), values = rnorm(366 * 2, mean = 100)
)
sample_data_clean <- aggregate.model:::clean_data(sample_data, max.ar = 4)
aggregate.model:::estimate_module(sample_data_clean, "yvar", "xvar")
#> $isat_list
#> # A tibble: 5 × 3
#>      ar    BIC isat_object
#>   <int>  <dbl> <list>     
#> 1     0 -2355. <isat>     
#> 2     1 -2336. <isat>     
#> 3     2 -2323. <isat>     
#> 4     3     0  <cpl [1]>  
#> 5     4     0  <cpl [1]>  
#> 
#> $best_model
#> 
#> Date: Wed Mar  6 14:29:24 2024 
#> Dependent var.: y 
#> Method: Ordinary Least Squares (OLS)
#> Variance-Covariance: Ordinary 
#> No. of observations (mean eq.): 366 
#> Sample: 1 to 366 
#> 
#> SPECIFIC mean equation:
#> 
#>              coef  std.error    t-stat   p-value    
#> mconst  4.6076775  0.0030976 1487.5014 < 2.2e-16 ***
#> iis139  0.0267351  0.0087885    3.0421 0.0025240 ** 
#> iis345  0.0292552  0.0088274    3.3141 0.0010138 ** 
#> iis346 -0.0290060  0.0088274   -3.2859 0.0011177 ** 
#> sis9   -0.0109005  0.0037938   -2.8733 0.0043062 ** 
#> sis25   0.0096551  0.0022966    4.2041 3.319e-05 ***
#> sis187 -0.0099429  0.0026217   -3.7925 0.0001752 ***
#> sis199  0.0079296  0.0026842    2.9542 0.0033433 ** 
#> sis294  0.0161413  0.0040200    4.0153 7.244e-05 ***
#> sis299 -0.0150077  0.0040639   -3.6929 0.0002564 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Diagnostics and fit:
#> 
#>                    Chi-sq df p-value
#> Ljung-Box AR(1)   0.22312  1  0.6367
#> Ljung-Box ARCH(1) 0.29054  1  0.5899
#>                            
#> SE of regression    0.00876
#> R-squared           0.18155
#> Log-lik.(n=366)  1219.55995
#> 
#> $args
#> $args$clean_data
#> # A tibble: 366 × 28
#>    index time       trend  yvar  xvar ln.yvar ln.xvar D.ln.yvar D.ln.xvar
#>    <int> <date>     <dbl> <dbl> <dbl>   <dbl>   <dbl>     <dbl>     <dbl>
#>  1     1 2000-01-01     1  99.3 101.     4.60    4.61  NA       NA       
#>  2     2 2000-01-02     2  99.5  99.8    4.60    4.60   0.00123 -0.00719 
#>  3     3 2000-01-03     3  99.2  99.4    4.60    4.60  -0.00229 -0.00437 
#>  4     4 2000-01-04     4 102.  101.     4.62    4.61   0.0240   0.0145  
#>  5     5 2000-01-05     5 100.   99.6    4.61    4.60  -0.0130  -0.0124  
#>  6     6 2000-01-06     6 101.  101.     4.61    4.62   0.00558  0.0159  
#>  7     7 2000-01-07     7 100.  101.     4.60    4.61  -0.00944 -0.00400 
#>  8     8 2000-01-08     8 101.   99.7    4.62    4.60   0.0121  -0.0106  
#>  9     9 2000-01-09     9  99.7  99.7    4.60    4.60  -0.0147  -0.000145
#> 10    10 2000-01-10    10  99.2  99.9    4.60    4.60  -0.00492  0.00192 
#> # ℹ 356 more rows
#> # ℹ 19 more variables: L1.D.ln.yvar <dbl>, L1.D.ln.xvar <dbl>,
#> #   L1.ln.yvar <dbl>, L1.ln.xvar <dbl>, L2.D.ln.yvar <dbl>, L2.D.ln.xvar <dbl>,
#> #   L2.ln.yvar <dbl>, L2.ln.xvar <dbl>, L3.D.ln.yvar <dbl>, L3.D.ln.xvar <dbl>,
#> #   L3.ln.yvar <dbl>, L3.ln.xvar <dbl>, L4.D.ln.yvar <dbl>, L4.D.ln.xvar <dbl>,
#> #   L4.ln.yvar <dbl>, L4.ln.xvar <dbl>, q_2 <int>, q_3 <int>, q_4 <int>
#> 
#> $args$dep_var_basename
#> [1] "yvar"
#> 
#> $args$x_vars_basename
#> [1] "xvar"
#> 
#> $args$use_logs
#> [1] "both"
#> 
#> $args$ardl_or_ecm
#> [1] "ardl"
#> 
#> $args$max.ar
#> [1] 4
#> 
#> $args$max.dl
#> [1] 2
#> 
#>