# hypothesis testing examples.   
library(MASS)		# contains function vcov for extracting cov(betahat) from glm fit

#### first for Meron data, where phi is not estimated.
m <- read.table(file="meron.200.glim.txt", header=TRUE  ) 
m[1:5,]
attach(m)	

# define appropriate indicator variables
goats = 1 * (graze.type == 2) # so will be a numeric variable
cattle.low = (graze.type == 1)*(graze.intensity ==1)
cattle.med.high = (graze.type == 1)*(graze.intensity >= 2)

#### fit logistic regression to full model. 
logit.full = glm(tree.92 ~  alt + stream.dist + road.dist 
			+ water.dist +  slope + ns.aspect + ew.aspect 
			+ ns.aspect*slope + ew.aspect*slope 
			+ cattle.low + cattle.med.high 
			+ goats, 
		family = binomial(link=logit)) 		
summary(logit.full)

dev.full = logit.full$dev	# extract deviance and df for full model
df.full = logit.full$df.residual
c(dev.full, df.full)
[1] 122.6028 187.0000

#### now fit model without slope and aspect variables
logit.sub = glm(tree.92 ~  alt + stream.dist + road.dist 
			+ water.dist      
			+ cattle.low + cattle.med.high 
			+ goats, 
		family = binomial(link=logit)) 		
summary(logit.sub)

dev.sub = logit.sub$dev	# extract deviance and df for submodel
df.sub = logit.sub$df.residual
c(dev.sub, df.sub)
[1] 154.167 192.000

#### compute LR stat and p-value
LR.chisq = (dev.sub - dev.full)
p.value.LR = 1 - pchisq(LR.chisq, df.sub - df.full)
c(LR.chisq,   p.value.LR)
[1] 3.156425e+01 7.246223e-06

#### now do the Wald test
var.names = c("slope", "ns.aspect", "ew.aspect", "slope:ns.aspect", "slope:ew.aspect")
beta.test = logit.full$coef[var.names]	# estimates of the coeffs we're testing
beta.test

cov.beta.full = vcov(logit.full)	# cov(betahat) from full model
cov.beta.full
cov.beta.test = cov.beta.full[var.names, var.names] # cov matrix for the coefs we're testing
cov.beta.test 
 
Wald.chisq = t(beta.test) %*% solve(cov.beta.test) %*% beta.test
p.value.Wald = 1 - pchisq(Wald.chisq, df.sub - df.full)
c(Wald.chisq,   p.value.Wald)
[1] 18.49775592  0.00238312
 
 
# now for mismatch data, where phi is estimated
m <- read.table(file="mismatch.data.txt", header=FALSE  ) 
m[1:5,]

surv_time	= m[,1]
reject	= m[,2]
mismatch	= m[,3]
age		= m[,4]
wait_time	= m[,5]
cal_time	= m[,6]

# gamma regression using inverse link

# obtain a starting value from a linear regression of 1/surv_time on X and truncating
 linear.2 = lm( 1/surv_time ~ 
	reject + mismatch + age + wait_time + cal_time) 
summary(linear.2)
beta_0 = linear.2$coef
beta_pos = beta_0 * (beta_0 > 0)

# gamma regression for full model
gamma.full = glm( surv_time ~ 
	reject + mismatch + age + wait_time + cal_time, 
		family = Gamma(link="inverse"),
		start=beta_pos,
		control=glm.control(trace = TRUE))
summary(gamma.full) 

# estimate phi using the Pearson residuals
pearson.resids = (surv_time - gamma.full$fitted)/gamma.full$fitted
phi.hat.full = sum(pearson.resids^2) / gamma.full$df.residual
phi.hat.full
[1] 1.908089

dev.full = gamma.full$dev	# extract deviance and df for full model
df.full = gamma.full$df.residual
c(dev.full, df.full)

# now fit model without wait_time and cal_time
gamma.sub = glm( surv_time ~ 
	reject + mismatch + age , 
		family = Gamma(link="inverse"),
		start=beta_pos[1:4],
		control=glm.control(trace = TRUE))
summary(gamma.sub) 

dev.sub = gamma.sub$dev	# extract deviance and df for submodel
df.sub = gamma.sub$df.residual
c(dev.sub, df.sub)
[1] 64.7912 35.0000

 
#### compute LR stat and p-value using F dist
LR.stat = ((dev.sub - dev.full)/phi.hat.full) / (df.sub - df.full)
p.value.LR = 1 - pf(LR.stat, df.sub - df.full, df.full)

c(LR.stat, p.value.LR)
[1] 1.0974882 0.3455802