Related: Check memoryless property of a Markov chain
I try to expand the verifyMarkovProperty function of the markovchain package to assess the Markov Property of transition matrices by assigning each unique realised state in the sequence of matrices an ID and testing the sequence of these IDs. However, I am not sure if my approach does make sense statistically.
My question is: Is the method I use to test the Markov Property of a sequence of matrices theoretically valid? If so, would it also work for a sequence of matrices where the unique realised states only occur once or at maximum a few times (because of a large amount of possible states compared to a short observation period)?
To see if this works I wrote a short script in which I use the verifyMarkovProperty function in four different scenarios:
- sequence of single states depending only on t
- sequence of single states depending on t and t-1
- sequence of multiple states depending only on t
- sequence of multiple states depending on t and t-1
For simplicity I use only two possible states (a and b) and my matrices for scenarios 3 and 4 only consist of two elements. For each transition combination I report the test statistic, p-value and whether the null hypothesis of the Markov Property holding should be rejected on the 95 and 99 confidence level.
Now, using the below code, I get the expected results for scenarios 1 and 2, with only a few runs of scenario 1 yielding false-positive results. Running scenarios 3 and 4, I get a feq more false-positives for scenario 3 which worries me a bit. Furthermore, for scenario 4 I get quite a few false-negatives, even though the generation process clearly does not comply to the Markov Property.
Load packages:
require(markovchain)
require(data.table)
Scenario 1:
#Scenario 1
#Markov Property OK (depends only on t):
#Sequence generation: if t="a" use pA for sampling, if t="b" use pB for sampling
pA <- c(0.7, 0.3)
pB <- c(0.5, 0.5)
sequence <- sample(c("a","b"), 1)
for (i in 1:1000) {
if (sequence[length(sequence)] == "a") {
sequence <- rbind(sequence, sample(c("a","b"), 1, p=pA))
}
if (sequence[length(sequence)] == "b") {
sequence <- rbind(sequence, sample(c("a","b"), 1, p=pB))
}
}
verifyMarkovProperty(sequence)
markovTest <- verifyMarkovProperty(sequence)
testTable <- data.table(chisq=0, pval=0, state="init", reject95=FALSE, reject99=FALSE)
for (i in 1:length(markovTest)) {
testTable <- rbind(testTable, data.table(chisq=markovTest[[i]][[1]], pval=markovTest[[i]][[3]], state=names(markovTest[i]), reject95=markovTest[[i]][[3]]<0.05, reject99=markovTest[[i]][[3]]<0.01))
}
testTable
states
Scenario 2:
#Scenario 2
#Markov Property not OK (depends on t and t-1):
#Sequence generation: if t="a" and t-1="a" use pA for sampling, if t="b" and t-1="b" use pB for sampling
pA <- c(0.7, 0.3)
pB <- c(0.5, 0.5)
sequence <- sample(c("a","b"), 1)
sequence <- rbind(sequence, sample(c("a","b"), 1))
for (i in 1:1000) {
if (sequence[length(sequence)-1] == "a" & sequence[length(sequence)] == "a") {
sequence <- rbind(sequence, sample(c("a","b"), 1, p=pA))
}
else if (sequence[length(sequence)-1] == "b" & sequence[length(sequence)] == "b") {
sequence <- rbind(sequence, sample(c("a","b"), 1, p=pB))
}
else sequence <- rbind(sequence, sample(c("a","b"), 1))
}
verifyMarkovProperty(sequence)
markovTest <- verifyMarkovProperty(sequence)
testTable <- data.table(chisq=0, pval=0, state="init", reject95=FALSE, reject99=FALSE)
for (i in 1:length(markovTest)) {
testTable <- rbind(testTable, data.table(chisq=markovTest[[i]][[1]], pval=markovTest[[i]][[3]], state=names(markovTest[i]), reject95=markovTest[[i]][[3]]<0.05, reject99=markovTest[[i]][[3]]<0.01))
}
testTable
states
Scenario 3:
#Scenario 3
#Markov Property OK (depends only on t):
#Sequence generation: if t="a,a" use pA, if t="b,b" use pB, else use (0.5, 0.5)
pA <- c(0.7, 0.3)
pB <- c(0.5, 0.5)
sequence <- data.table(v1=sample(c("a","b"), 1), v2=sample(c("a","b"), 1))
for (i in 1:1000) {
if (sequence[length(sequence[,v1]),v1] == "a" & sequence[length(sequence[,v2]),v2] == "a") {
sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1, p=pA), v2=sample(c("a","b"), 1, p=pA)))
}
else if (sequence[length(sequence[,v1]),v1] == "b" & sequence[length(sequence[,v2]),v2] == "b") {
sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1, p=pB), v2=sample(c("a","b"), 1, p=pB)))
}
else sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1), v2=sample(c("a","b"), 1)))
}
sequence <- cbind(id=seq(1:nrow(sequence)), sequence)
states <- cbind(unique(sequence[,list(v1,v2)]), state=seq(1:nrow(unique(sequence[,list(v1,v2)]))))
setkey(sequence, v1, v2)
setkey(states, v1, v2)
sequence <- sequence[states][order(id)]
testSequence <- sequence[,state]
markovTest <- verifyMarkovProperty(testSequence)
testTable <- data.table(chisq=0, pval=0, state="init", reject95=FALSE, reject99=FALSE)
for (i in 1:length(markovTest)) {
testTable <- rbind(testTable, data.table(chisq=markovTest[[i]][[1]], pval=markovTest[[i]][[3]], state=names(markovTest[i]), reject95=markovTest[[i]][[3]]<0.05, reject99=markovTest[[i]][[3]]<0.01))
}
testTable
states
Scenario 4:
#Scenario 4
#Markov Property not OK (depends on t and t-1):
#Sequence generation: if t="a,a" and t-1="a,a" use pA, if t="b,b" and t-1="b,b" use pB, else use (0.5, 0.5)
pA <- c(0.7, 0.3)
pB <- c(0.5, 0.5)
sequence <- data.table(v1=sample(c("a","b"), 1), v2=sample(c("a","b"), 1))
sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1), v2=sample(c("a","b"), 1)))
for (i in 1:1000) {
if ((sequence[length(sequence[,v1]),v1] == "a" & sequence[length(sequence[,v2]),v2] == "a") & (sequence[length(sequence[,v1])-1,v1] == "a" & sequence[length(sequence[,v2])-1,v2] == "a")) {
sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1, p=pA), v2=sample(c("a","b"), 1, p=pA)))
}
else if ((sequence[length(sequence[,v1]),v1] == "b" & sequence[length(sequence[,v2]),v2] == "b") & (sequence[length(sequence[,v1])-1,v1] == "b" & sequence[length(sequence[,v2])-1,v2] == "b")) {
sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1, p=pB), v2=sample(c("a","b"), 1, p=pB)))
}
else sequence <- rbind(sequence, data.table(v1=sample(c("a","b"), 1), v2=sample(c("a","b"), 1)))
}
sequence <- cbind(id=seq(1:nrow(sequence)), sequence)
states <- cbind(unique(sequence[,list(v1,v2)]), state=seq(1:nrow(unique(sequence[,list(v1,v2)]))))
setkey(sequence, v1, v2)
setkey(states, v1, v2)
sequence <- sequence[states][order(id)]
testSequence <- sequence[,state]
verifyMarkovProperty(testSequence)
markovTest <- verifyMarkovProperty(testSequence)
testTable <- data.table(chisq=0, pval=0, state="init", reject95=FALSE, reject99=FALSE)
for (i in 1:length(markovTest)) {
testTable <- rbind(testTable, data.table(chisq=markovTest[[i]][[1]], pval=markovTest[[i]][[3]], state=names(markovTest[i]), reject95=markovTest[[i]][[3]]<0.05, reject99=markovTest[[i]][[3]]<0.01))
}
testTable
states