-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIBM_full_Likelihood.R
307 lines (246 loc) · 11.3 KB
/
IBM_full_Likelihood.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
##
# Date created: September 25th, 2013
# Authors: Marie Auger-Methe, Ulrike Schlaegel, Craig DeMars
# Please cite our paper if you use our script:
# DeMars, C., M. Auger-Methe, U. Schlaegel, S. Boutin, (Published online)
# Inferring Parturition and Neonate Survival from Movement Patterns of Female Ungulates.
# Ecology and Evolution. DOI: 10.1002/ece3.785
# For an in-depth explanation of the code see:
# Appendix S2. Likelihood Functions Used in the Individual-based Method.
# Appendix S3. R Code for the Individual-based Method.
# from the supporting information:
# ece3785-sup-0001-AppendixS1-S4.docx
# The main difference between this code and the one presented in IBM.R and the appendix S3
# is this function calculated the full likelihood instead of the approximation.
# Because, in this case, we are numerically estimating all of the parameters,
# this code will take longer to run.
# fullnll1 now replaces nllk and nll1 now optimizess fullnll1 instead of nllk.
# This mean that now both lambda and k are estimated simultaneously numerically.
# Now we have fullnll2 and changed nll2 so it optimizes simultaneously lambda and k.
# Also, nll1 and nll2 returns now both NLL value and the MLE values.
# In the function mnll3M we now have:
# columns for BIC values in resCA
# small changes in sections "Calf survived" and "Calf lost"
# (MLE values return from function nll1 and nll2 used
# instead of optimizing again for best BP)
# calculate BIC values and get model with min BIC
#################################################################
# Functions that calculate the negative log likelihood (nll) of 2 of the 3 models
# and the nll of k, one of the parameter used to calculate the nll of these models.
# Note that all models assume
# that the step length are distributed with an exponential distribution.
# They differ in how the main parameter of the exponential distribution
# varies through the time series.
# nll1 calculates the nll of a female with a calf that survived.
# This model assumes that the mean step length is constant prior to the birth of a calf.
# Once the calf is born the mean step length decrease to 1/(l_a * k),
# which should be really close to 0.
# The mean step length then linearly increases
# to reach the same mean step length as prior to calf birth.
# The speed of the increase depends on the time it takes to recover the normal movement,
# which is represented by k.
# Once the mean step length reaches the value from before the birth of the calf,
# it remains at this value.
# The parameters to estimate are:
# 1 BP, l_a, and k
# fullnll1 evaluates the negative log likelihood at parameter values (lambda, k) under model 1
fullnll1 <- function(param, SL_a, SL_b, ti_b){
# param is the vector of parameters of interest
l_a <- param[1]
k <- param[2]
# likelihood part t=1,...,BP
LL_a <- dexp(SL_a, l_a, log=T)
# likelihood part t=BP+1,...,T
z <- 1/l_a
z_b <- (ti_b-ti_b[1]+1)*z/k
z_b[z_b > z ] <- z
LL_k <- dexp(SL_b, 1/z_b, log=T)
# sum all negative log likelihoods from all parts
return(-sum(LL_a) - sum(LL_k))
}
# nll1 minimizes fullnll1 over possible values for (lambda, k)
# This results in the maximum likelihood estimates for (lambda, K)
nll1 <- function(BP, SL, ti, kc){
n <- length(SL)
###
# Divides the time series into two sections:
# a: before the birth of the calf
# b: after the birth of the calf
SL_a <- SL[1:BP]
s_b <- (BP+1):n
SL_b <- SL[s_b]
ti_b <- ti[s_b]
# Numerically estimate the MLE of lambda and k
# and obtain the corresponding negative log-likelihood values
# start values for optim are:
# for lambda: the mean step length for t=1,...,BP
# for k: 4 weeks (equivalent thereof in steps)
startvalues <- c(length(SL_a)/sum(SL_a), 4*24/(dint*subs)*7)
mnll_b <- optim(startvalues, fullnll1, method="L-BFGS-B", SL_a=SL_a, SL_b=SL_b, ti_b=ti_b, lower=c(0.0005, kc[1]), upper=c(1, kc[2]))
###
# Getting the total nll for the whole time series
nLL <- mnll_b$value
l_a <- mnll_b$par[1]
k <- mnll_b$par[2]
return(list("nLL"=nLL, "l_a"=l_a, "k"=k))
}
# nll2 calculates the nll of a female with a calf that died.
# This model assumes that the mean step length is constant prior to the birth of a calf.
# Once the calf is born the mean step length decrease to 1/(l_a * k),
# which should be really close to 0.
# The mean step length then linearly increases
# to reach the same mean step length as prior to calf birth.
# The speed of the increase depends on the time it takes to recover the normal movement,
# which is represented by k.
# Once the mean step length reaches the value from before the birth of the calf,
# it remains at this value.
# The k represent the time it takes to recover the normal movement if the calf survives.
# The previous assumptions are the same as for nll1.
# This model further assumes
# that the female immediately recovers her normal movement when the calf dies.
# This abrupt switch is represented by a second BP.
# The parameters to estimate are:
# 2 BPs, l_a, and k
# fullnll2 evaluates the negative log likelihood at parameter values (lambda, k) under model 2
fullnll2 <- function(param, SL_a, SL_b, SL_c, ti_b){
# param is the vector of parameters of interest
l_a <- param[1]
k <- param[2]
# likelihood part t=1,...,BP1
LL_a <- dexp(SL_a, l_a, log=T)
# likelihood part t=BP1+1,...,BP2
z <- 1/l_a
z_b <- (ti_b-ti_b[1]+1)*z/k
z_b[z_b > z ] <- z
LL_k <- dexp(SL_b, 1/z_b, log=T)
# likelihood part t=BP2+1,...,T
LL_c <- dexp(SL_c, l_a, log=T)
# sum all negative log likelihoods from all parts
return(-sum(LL_a) - sum(LL_k) - sum(LL_c))
}
# nll2 minimizes fullnll2 over possible values for (lambda, k)
# This results in the maximum likelihood estimates for (lambda, K)
nll2 <- function(BP, SL, ti, kc){
n <- length(SL)
# Divides the time series into three sections:
# a: before the birth of the calf
# b: after the birth of the calf but before it dies
# c: after the death of the calf
SL_a <- SL[1:BP[1]]
s_b <- (BP[1]+1):BP[2]
SL_b <- SL[s_b]
ti_b <- ti[s_b]
SL_c <- SL[(BP[2]+1):n]
# Numerically estimate the MLE of lambda and k
# and obtain the corresponding negative log-likelihood values
# start values for optim are:
# for lambda: the mean step length for t=1,...,BP
# for k: 4 weeks (equivalent thereof in steps)
startvalues <- c(length(SL_a)/sum(SL_a), 4*24/(dint*subs)*7)
mnll_b <- optim(startvalues, fullnll2, method="L-BFGS-B", SL_a=SL_a,
SL_b=SL_b, SL_c=SL_c, ti_b=ti_b,
lower=c(0.0005, kc[1]), upper=c(1, kc[2]))
# Getting the total nll for the whole time series
nLL <- mnll_b$value
l_a <- mnll_b$par[1]
k <- mnll_b$par[2]
return(list("nLL"=nLL, "l_a"=l_a, "k"=k))
}
# Function minimise the likelihood of the 3 Models
mnll3M <- function(movF, int, kcons){
# movF: list with at least 3 elements
# 1. SL: numeric vector that contains the step lengths
# measured at regular time intervals.
# NAs are not allowed.
# 2. ti: integer vector that identifies
# the time of each step length of SL.
# It is the index of the times found in tp.
# ti and SL should be of exactly the same length.
# NAs are not allowed.
# 3. tp: POSIXct vector that identifies the real date and time of the SL.
# The missing steps should be represented with NAs.
# For example if you have locations
# (0,0) (0,1) (0,3) (1,3) (7,3)
# taken at:
# 01:00, 02:00, 03:00, 08:00, 09:00
# SL = (1,2,6)
# ti = (1,2,4)
# tp = (01:00, 02:00, NA, 08:00, NA)
# (Although excluded for clarity, tp should include the date)
# We recommend that the time series only included the time period
# relevant to the birth and death of the calf.
# We only included movement from xx to xx.
# int: integer value indicating the minimum number of steps needed between
# the beginning of the time series and the first BP (birth of calf),
# the last BP (death of calf) and the end of the time series,
# and between the two BPs.
# This is required in part because you need some steps
# in most sections of the time series to estimate parameters.
# kcons: numeric vector with two values that contains
# the minimum and maximum values c(kmin,kmax) for parameter k.
# k represents the time it takes for the female with a calf that survives
# to recover her normal movement.
# We contrained our values to be the equivalent in steps of 3 and 6 weeks.
# kcons is required as it is needed by the function optimize()
# but to place no informed restriction,
# you can make kcons <- c(0,vlv), where vlv is a very large value
# Results for model comparison
resCA <- matrix(NA, 1, ncol=12)
colnames(resCA) <- c("n", "mnll_0","mnll_1","mnll_2",
"AIC0", "AIC1", "AIC2", "BM",
"BIC0", "BIC1", "BIC2", "BM_BIC")
# BPs index and actual date and time
BPs <- data.frame(matrix(NA, 1, ncol=6))
colnames(BPs) <- c("BP1c", "BP2c", "BP2l", "iBP1c", "iBP2c", "iBP2l")
# Parameters (other than the BP) of each model
mpar <- matrix(NA, 1, 5)
colnames(mpar) <- c('b0','b1','b2','k1', 'k2')
# Sample size
resCA[1] <- length(movF$SL)
##
# M0: No calf
# This model assumes that the movement pattern is constant for the whole time series
# and thus the mean step length (b0) is constant.
# The only parameter estimated is b0, which is the inverse of the rate.
# It has a analytical solution and
# thus we can easily get the minimum negative log likelihood (mnll)
mpar[1] <- sum(movF$SL)/resCA[1] #b0
resCA[2] <- -sum(dexp(movF$SL,1/mpar[1],log=T)) #mnll0
##
# Calf survived
BP1ser <- int:(resCA[1]-int) # Serie of all possible BPs
temp_1 <- lapply(BP1ser, nll1, SL=movF$SL, ti=movF$ti, kc=kcons)
NLL_1 <- as.vector(unlist(temp_1)[seq(1,length(unlist(temp_1)),3)])
MNLL_1_i <- which.min(NLL_1)
resCA[3] <- NLL_1[MNLL_1_i] # mnll_1
BPs[4] <- BP1ser[MNLL_1_i]
BPs[1] <- as.character(movF$tp[movF$ti[BPs[[4]]]]) #BP1c
mpar[2] <- 1/temp_1[[MNLL_1_i]]$l_a #b1
mpar[4] <- temp_1[[MNLL_1_i]]$k
##
# Calf lost
# Getting all possible combination of BPs
BP2ser <- combn(int:(resCA[1]-int),2)
BP2ser <- BP2ser[,diff(BP2ser) >= int]
BP2ser <- split(t(BP2ser),1:ncol(BP2ser))
# Applying the model 2 on all possible BPs
temp_2 <- lapply(BP2ser,nll2,SL=movF$SL, ti=movF$ti, kc=kcons)
NLL_2 <- as.vector(unlist(temp_2)[seq(1,length(unlist(temp_2)),3)])
MNLL_2_i <- which.min(NLL_2)
resCA[4] <- NLL_2[MNLL_2_i] # mnll_2
BPs[5:6] <- BP2ser[[MNLL_2_i]]
BPs[2] <- as.character(movF$tp[movF$ti[BPs[[5]]]]) #BP2c
BPs[3] <- as.character(movF$tp[movF$ti[BPs[[6]]]]) #BP2l
mpar[3] <- 1/temp_2[[MNLL_2_i]]$l_a #b2
mpar[5] <- temp_2[[MNLL_1_i]]$k
# Calculate AIC and compare models
resCA[5] <- 2*(resCA[2] + 1) # Only lambda to estimate
resCA[6] <- 2*(resCA[3] + 3) # lambda, k and BP
resCA[7] <- 2*(resCA[4] + 4) # lambda, k and 2*BPs
resCA[8] <- which.min(resCA[,5:7])-1
resCA[9] <- 2*resCA[2] + 1*log(resCA[1])
resCA[10] <- 2*resCA[3] + 3*log(resCA[1])
resCA[11] <- 2*resCA[4] + 4*log(resCA[1])
resCA[12] <- which.min(resCA[,9:11])-1
return(list(resCA=resCA,BPs=BPs,mpar=mpar))
}