# hypothesis testing examples. library(MASS) # contains function vcov for extracting cov(betahat) from glm fit #### first for Meron data, where phi is not estimated. m <- read.table(file="meron.200.glim.txt", header=TRUE ) m[1:5,] attach(m) # define appropriate indicator variables goats = 1 * (graze.type == 2) # so will be a numeric variable cattle.low = (graze.type == 1)*(graze.intensity ==1) cattle.med.high = (graze.type == 1)*(graze.intensity >= 2) #### fit logistic regression to full model. logit.full = glm(tree.92 ~ alt + stream.dist + road.dist + water.dist + slope + ns.aspect + ew.aspect + ns.aspect*slope + ew.aspect*slope + cattle.low + cattle.med.high + goats, family = binomial(link=logit)) summary(logit.full) dev.full = logit.full$dev # extract deviance and df for full model df.full = logit.full$df.residual c(dev.full, df.full) [1] 122.6028 187.0000 #### now fit model without slope and aspect variables logit.sub = glm(tree.92 ~ alt + stream.dist + road.dist + water.dist + cattle.low + cattle.med.high + goats, family = binomial(link=logit)) summary(logit.sub) dev.sub = logit.sub$dev # extract deviance and df for submodel df.sub = logit.sub$df.residual c(dev.sub, df.sub) [1] 154.167 192.000 #### compute LR stat and p-value LR.chisq = (dev.sub - dev.full) p.value.LR = 1 - pchisq(LR.chisq, df.sub - df.full) c(LR.chisq, p.value.LR) [1] 3.156425e+01 7.246223e-06 #### now do the Wald test var.names = c("slope", "ns.aspect", "ew.aspect", "slope:ns.aspect", "slope:ew.aspect") beta.test = logit.full$coef[var.names] # estimates of the coeffs we're testing beta.test cov.beta.full = vcov(logit.full) # cov(betahat) from full model cov.beta.full cov.beta.test = cov.beta.full[var.names, var.names] # cov matrix for the coefs we're testing cov.beta.test Wald.chisq = t(beta.test) %*% solve(cov.beta.test) %*% beta.test p.value.Wald = 1 - pchisq(Wald.chisq, df.sub - df.full) c(Wald.chisq, p.value.Wald) [1] 18.49775592 0.00238312 # now for mismatch data, where phi is estimated m <- read.table(file="mismatch.data.txt", header=FALSE ) m[1:5,] surv_time = m[,1] reject = m[,2] mismatch = m[,3] age = m[,4] wait_time = m[,5] cal_time = m[,6] # gamma regression using inverse link # obtain a starting value from a linear regression of 1/surv_time on X and truncating linear.2 = lm( 1/surv_time ~ reject + mismatch + age + wait_time + cal_time) summary(linear.2) beta_0 = linear.2$coef beta_pos = beta_0 * (beta_0 > 0) # gamma regression for full model gamma.full = glm( surv_time ~ reject + mismatch + age + wait_time + cal_time, family = Gamma(link="inverse"), start=beta_pos, control=glm.control(trace = TRUE)) summary(gamma.full) # estimate phi using the Pearson residuals pearson.resids = (surv_time - gamma.full$fitted)/gamma.full$fitted phi.hat.full = sum(pearson.resids^2) / gamma.full$df.residual phi.hat.full [1] 1.908089 dev.full = gamma.full$dev # extract deviance and df for full model df.full = gamma.full$df.residual c(dev.full, df.full) # now fit model without wait_time and cal_time gamma.sub = glm( surv_time ~ reject + mismatch + age , family = Gamma(link="inverse"), start=beta_pos[1:4], control=glm.control(trace = TRUE)) summary(gamma.sub) dev.sub = gamma.sub$dev # extract deviance and df for submodel df.sub = gamma.sub$df.residual c(dev.sub, df.sub) [1] 64.7912 35.0000 #### compute LR stat and p-value using F dist LR.stat = ((dev.sub - dev.full)/phi.hat.full) / (df.sub - df.full) p.value.LR = 1 - pf(LR.stat, df.sub - df.full, df.full) c(LR.stat, p.value.LR) [1] 1.0974882 0.3455802