Sample data
dat <- structure(list(location.id = structure(c(198L, 243L, 147L, 114L,152L, 117L, 151L, 129L, 148L, 154L, 233L, 12L,
226L, 129L, 146L,126L, 247L, 147L, 80L, 133L, 162L, 147L, 152L, 126L, 36L, 154L,147L, 166L, 132L, 241L,
146L, 152L, 141L, 227L, 41L, 187L, 131L,140L, 232L, 155L, 130L, 166L, 222L, 246L, 222L, 129L, 262L, 244L,
188L, 210L), .Label = c("11001", "11003", "11005", "11006","11007","11008", "14001", "14002", "15002",
"15010", "15012", "15014","15015", "15017", "15018", "15019","15021", "15022", "17001", "17002","17003",
"17004", "17005", "17006", "17007", "17008","21008", "21009","21011", "21012", "21013", "21014","21018",
"21019", "21020", "21021", "22001", "22002", "22003", "22005","22007", "22008", "22009", "22010","22012",
"29001", "29002","29003", "29007", "29023", "31001", "31002","31003", "31006","31011","31017","31018",
"31019", "31020", "31021", "31022","31023", "31024","31025","31026", "31027", "31029","31042","31043",
"31044", "31047", "31048", "31049", "31050", "31051","31054","31057", "31058", "31060","35001","35002",
"35003","35004", "35005", "35006", "35007", "35008","35009","35010","35011", "35012","35013","35014",
"35015", "35016", "35017", "35018", "35019", "35020","35021","35022", "35023", "35024","35025","35026", "35027", "35028", "35029", "35030", "35031",
"35032", "35034", "35035", "35036", "35037", "35038","35039","35040", "35041", "35042","35043","35044", "35046", "35048",
"35050", "41001", "41002", "41003", "41004", "41005","41006","41007", "41008", "41009","41010","41011", "41012", "41013",
"41014", "41015", "41016", "41017", "41018", "41019","41020","41021", "41022", "41023","41024","41025", "41026", "41027",
"41028", "41029", "41030", "41031", "41032", "41033","41034","41035", "41036", "41037","41038","41039", "42001", "42002",
"42003", "42004", "42005", "42006", "42007", "42009","42010","42011", "42014", "42019", "42020","43001", "43002", "43003",
"43004", "43005", "43006", "43007", "43008", "43009","43010","43011", "43012", "43013", "43014","43015", "43016", "43017",
"43018", "43019", "43020", "43021", "43022", "43023","43024","43025", "43026", "43027", "43028","43029", "43030", "43031",
"43032", "43033", "43034", "43035", "50001", "50002", "50003","50004", "50005", "50006", "50007","50008", "50009", "50010",
"50011", "51001", "51002", "51003", "51004", "51005", "51006","51007", "51008", "51009", "51010","51011", "51012", "51013",
"51014", "51015", "51016", "51017", "51018", "51019", "51020","51021", "51022", "52001", "52002", "52003", "52004", "52005",
"52006", "52007", "52008", "52009", "52010", "52011", "52012","52013", "52014", "52015", "52016", "52017", "52018", "53001"
), class = "factor"), year = structure(c(4L, 3L, 26L, 27L, 14L,19L, 13L, 19L, 9L, 21L, 4L, 20L, 27L, 17L, 25L, 23L, 3L, 13L,
10L, 22L, 27L, 27L, 21L, 15L, 26L, 27L, 15L, 18L, 21L, 28L, 22L, 17L, 17L, 26L, 8L, 26L, 13L, 13L, 2L, 19L, 17L, 12L, 5L, 17L,
27L, 4L, 5L, 16L, 28L, 26L), .Label = c("1987", "1988", "1989", "1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997",
"1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005",
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013",
"2014"), class = "factor"), yield = c(731.07, 2264.99,3382.08, 2520.42, 3093.13, 3030.99,
2796.10, 2098.87, 2797.54, 3037.78,1980, 2720, 2966.98, 2799.68, 3290.9, 3199.72, 702.70, 2085.21,2396.69,
1344, 2975.39, 3475.36, 2918.5, 3470,2864.99, 2849.73, 3398.87, 359.45,
2579.98, 2849.49, 2323.80, 2345.07,2798.13, 2931.56, 1199.93, 3170.05,
1599.72, 1735.04, 1600.24, 2299.06,2472.64, 1682.35, 1599.93, 2675.57,
2595.38, 1989.35, 2182.74, 2883.3,3523.37, 2820.11), dhs.rep = c(0.11,
12.94, 6.1, 7.41, 0, 0.03, 0, 0, 0, 4.32, 5.61, 0, 0, 2.01, 0, 1.9, 2.85,
0.14, 0.27, 4.16, 0, 1.89, 2.75, 0.98, 0, 0, 0, 0.5, 6.48, 2.95, 0, 0,
3, 3.76, 0.15, 0, 0, 0, 0, 0, 2.87,1.83, 0, 6.02, 11.17, 1.15, 0, 12.33, 0, 3.55), nhs.rip = c(0,
37.47, 9.47, 21.6, 3.18, 16.08, 0, 0, 0, 0, 3.15, 12.01, 6.16, 0, 0, 34.92, 0.1, 2.58,
11.01, 4.91, 0, 19.07, 0, 9.54, 70.28, 2.2, 0.52, 2.16, 0.7,108.32, 0, 0, 1.08, 12.17, 13.73, 0, 4.7, 0.25, 17.15, 0, 0,
2.21, 0.78, 26.86, 11.62, 0.99, 0, 72.74, 0.42, 2.01), solar.veg = c(21.57, 20.22, 21.7, 23.2, 22.95, 22.14,
21.85, 21.95, 22.44, 22.07, 21.138, 17.37, 16.6, 22.88, 19.82, 22.83, 17.87, 22.11,
18.8, 19.97, 17.28, 24.81, 21.57, 21.04, 15.46, 24.49,19.55, 21.12, 19.41, 19.30,
18.34, 21.5, 20.94, 17.7, 16.95, 22.13, 21.72, 21.12, 17.235, 21.97, 23.154, 24.24, 21.42,
19.17, 23.3216, 19.11, 20.21, 20.66, 24.74, 24.95), elevation = c(232.53,487.04, 269.5, 357.94, 636.83,
400.53, 489.75, 585.67, 662.87, 641.73, 353.21, 337.01, 314.9, 421.94, 887.89, 346.45, 321.6,
269.5, 388.07, 386.98, 906.96, 503.98, 575.18, 421.59, 450.33, 597.56, 312.68, 401.59, 383.07, 124, 887.89, 687.08, 519.76,
527.22, 409.98, 601.31, 528.53, 420.48, 186.81, 858.06, 548.78,532.25, 362.1, 257.96, 362.1, 487.63, 805.64, 453.03, 410.2,
34.96)), row.names = c(6645L, 8041L, 4028L, 1999L, 4974L, 2063L,4932L, 2602L, 4409L, 5200L, 7747L, 78L, 7384L, 2734L, 3944L,
2255L, 8262L, 4021L, 1368L, 3334L, 5596L, 4286L, 5083L, 2258L, 542L, 5328L, 4153L, 5721L, 3244L, 7965L, 3948L, 4980L, 3778L,
7408L, 691L, 6367L, 3121L, 3630L, 7712L, 5370L, 2752L, 5765L, 7169L, 8238L, 7171L, 2621L, 8705L, 8169L, 6425L, 6847L), class = "data.frame")
The data has a response variable called 'yield' and predictors which are certain climate variables, elevation, location.id and year. I am trying to run a the implementation of MARS from earth package following this vignette:
http://www.milbo.org/doc/earth-notes.pdf
In this implementation, I do not want to run the interactions between elevation
, location.id
and year
with any of the other variables. In this vignette, there is an example that if I want to exclude interaction between two variables, I can do this as follows;
PREDICTORS <- c("elevation","location.id","year") # these variables are not allowed to interact with any variables in PARENTS
PARENTS <- names(dat)[-3] # the third column is my response variables so I am removing it
no.int <- function(degree, pred, parents, namesx){
if (degree > 1) {
predictor <- namesx[pred]
parents <- namesx[parents != 0]
if((any(predictor %in% PREDICTORS) && any(parents %in% PARENTS)) ||
(any(predictor %in% PARENTS) && any(parents %in% PREDICTORS))) {
return(FALSE)
}
}
TRUE
}
library(earth)
fit.yld <- earth(x = dat[,PARENTS], y = dat[,3], keepxy = T, degree = 2, allowed = no.int,pmethod = "none")
summary(fit.yld)
I want to test 2-way interactions that's why I set degree = 2. However, I want to exclude elevation, location and year to interact with each other as well as with any of the climate variables in the model. In the vignette on page 21, bottom para it says that using the allowed argument: It prevents the specified predictor from being used in interaction terms
If I run the above function, I still see some terms where location and years are interacting. However, the function should be able to stop any sort of interaction between these two variables.
I understand this is quite a specific topic regarding a package so I would really appreciate some help.
Thanks