Introduction

Finally we will be running a model that combines data from marked and unmarked individuals AND estimates the overall number of individuals \(N\). Similar to the regular capture-recapture model, we will have to assume that there are \(\tilde N\) potential individuals, of which only a fraction \(\pi\) of them actually exist. Thus, the status of each individual is given by a binary variable \(w_i (i=1,...,\tilde N)\). This status variable is equal to 1 if the animal exists and 0 otherwise. We assume that:

\[w_i \sim Bernoulli (\pi) \]

Notice that in this model, \(\pi\) is the proportion of potential individuals that truly exist. Therefore, our estimate of the true population size \(\hat N\) is given by the number of potential individuals \(\tilde N\) times \(\pi\):

\[\hat N = \tilde N \times \pi\]

Furthermore, now we can modify our likelihood for the unmarked individuals in the following way:

\[n_j =\sum_{i=1}^\tilde N \sum_{k=1}^k z_{ijk} \sim Poisson(K \times \sum_{i=1}^\tilde N w_i\lambda_{ij}) \]

Simulating data for model 4

The code to simulate data for this model is identical to that used for model 3. The only difference will be within the JAGS model due to the fact that model 4 estimates the number of individuals whereas model 3 assumed that this was known.

rm(list=ls())
library('ggplot2')
set.seed(1)

sig2=5
lambda0=10
xmax=ymax=100
nind=20
nind.marked=5
nind.unmark=nind-nind.marked
coord.acenter=data.frame(x=runif(nind,min=25,max=75),
                         y=runif(nind,min=25,max=75))
plot(y~x,data=coord.acenter,xlim=c(0,xmax),ylim=c(0,ymax))

ntraps=25 #number of traps
tmp=seq(from=1,to=xmax,length.out=5)
coord.traps=expand.grid(x=tmp,
                        y=tmp)
points(y~x,data=coord.traps,col='red',pch=19,cex=0.5)

ntimes=10 #encounter times

#calculate distances
dist1=matrix(NA,ntraps,nind)
for (j in 1:ntraps){
  for (i in 1:nind){
    x2=(coord.traps$x[j]-coord.acenter$x[i])^2
    y2=(coord.traps$y[j]-coord.acenter$y[i])^2
    dist1[j,i]=sqrt(x2+y2)
  }
}
# dist1a=proxy::dist(x=coord.traps,y=coord.acenter)
# unique(dist1-dist1a)

#generate data
zarray=array(NA,dim=c(nind,ntraps,ntimes))
for (t1 in 1:ntimes){
  z=matrix(NA,ntraps,nind)
  for (i in 1:nind){
    g=exp(-dist1[,i]/sig2)    
    z[,i]=rpois(ntraps,lambda0*g)
    zarray[i,,t1]=z[,i]
  }
}

#separate marked data
ind.marked=1:nind.marked
ind.unmarked=(nind.marked+1):nind
zarray.marked=zarray[ind.marked,,]
zarray.unmarked=zarray[ind.unmarked,,]

#calculate summary for marked and unmarked
nvec.unmarked=apply(zarray.unmarked,2,sum)
zmat.marked=apply(zarray.marked,c(1,2),sum)

Estimating parameters

To estimate the model parameters, we rely on the code below

library('jagsUI')

# MCMC settings 
ni <- 10000 #number of iterations
nt <- 5    #interval to thin 
nb <- 5000  #number of iterations to discard as burn-in
nc <- 3    #number of chains

#parameters to monitor
params=c('lambda0','sig2','psi','coord.acenter','w')

#number of potential individuals
ntilde=100

#data for jags
dat1=list(zmat=zmat.marked,coord.trap=coord.traps,
          nind.marked=nind.marked,ntimes=ntimes,ntraps=ntraps,
          nvec=nvec.unmarked,ntilde=ntilde)

#run JAGS
inits=function(){
  list(w=rep(1,ntilde))
}
setwd('U:\\uf\\courses\\bayesian course\\group activities\\17 example scr\\mod4')
model4=jags(model.file="marked unmarked unknown N JAGS.R",inits=inits,
            parameters.to.save=params,data=dat1,
            n.chains=nc,n.burnin=nb,n.iter=ni,n.thin=nt,DIC=TRUE)
summary(model4) #the Rhat results suggest that the algorithm has converged
#look at model parameters
plot(model4$sims.list$lambda0,type='l',ylab='lambda0',xlab='Iterations')
abline(h=10,col='red')

plot(model4$sims.list$sig2,type='l',ylab='sigma2',xlab='Iterations')
abline(h=5,col='red')

As expected, we can estimate \(\lambda_0\) and \(\sigma^2\). However, we can also estimate well population size N:

plot(model4$sims.list$psi*ntilde,type='l',ylab='Nhat',xlab='Iterations')
abline(h=20,col='red')

Finally, given that the name of this model is spatial capture-recapture, let’s see how well we can estimate the spatial distribution of these individuals. To calculate this, I will use the following steps for each posterior sample:

  1. Get the estimated coordinates of the activity centers, just for the individuals estimated to truly exist

  2. Use a 2D kernel estimate to calculate the 2D density.

  3. Store these results.

Here is the code that I use to make this comparison.

library('MASS')
library('ggplot2')

coord.estim=model4$sims.list$coord.acenter
w.estim=model4$sims.list$w
npost=nrow(coord.estim)

res=matrix(NA,npost,51*51)
for (i in 1:npost){
  #get coordinates just for individuals that exist
  w=w.estim[i,]
  coord=coord.estim[i,,]
  ind=which(w==1)
  coord1=coord[ind,]
  coord2=data.frame(x=coord1[,1],y=coord1[,2])
  
  #2d kernel
  f1 <- kde2d(coord2$x, coord2$y, n = 51,
              lims=c(0,100,0,100)); #image(f1)
  f2=data.frame(x=rep(f1$x,51),
                y=rep(f1$y,each=51),
                z=matrix(f1$z,51*51,1))
  res[i,]=f2$z
}

#calculate posterior mean of 2D density
res1=data.frame(x=f2$x,y=f2$y,
                z=colMeans(res))
max1=max(res1$z)

ggplot(data=res1,aes(x=x,y=y,fill=z))+
  geom_tile()+
  theme(legend.position="none")+
  scale_fill_gradient2(low = "cyan", mid = "red",high='purple',
                       limits=c(0,max1),midpoint=0.5*max1)

Let’s compare these results to the true 2D density.

#compare to true density
f1 <- kde2d(coord.acenter$x, coord.acenter$y, n = 51,
            lims=c(0,100,0,100)); #image(f1)
f2=data.frame(x=rep(f1$x,51),
              y=rep(f1$y,each=51),
              z=matrix(f1$z,51*51,1))
max1=max(f2$z)
ggplot(data=f2,aes(x=x,y=y,fill=z))+
  geom_tile()+
  theme(legend.position="none")+
  scale_fill_gradient2(low = "cyan", mid = "red",high='purple',
                       limits=c(0,max1),midpoint=0.5*max1)



Comments?

Send me an email at