-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetMarkov.R
65 lines (50 loc) · 1.95 KB
/
getMarkov.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# This script maps the pipeline to each session in a dataset
source('utils.R')
## set options
INPUT_PATH <- <path to your dataset>
OUTPUT_PATH <- <path to save your processed data>
SESSION_ID <- 'session' # column name for session ID
TOPIC <- 'REC' # focus on code REC
CONCAT <- F # consider therapist codes that co-occur or not
STARTWITH <- 'P' # consider only transition from client to therapist
# STARTWITH determines the direction of condition, e.g., STARTWITH=P means looking
# at P -> T sequences.
## read data
dt <- fread(FILE_PATH)
## initiate empty table for transition sequences
dic <- data.table()
session <- unique(dt[, SESSION_ID, with=F])
dic <- cbind(dic, session)
seqNames <- createDic(dt, TOPIC, STARTWITH, CONCAT)
dic2 <- data.table(matrix(nrow=0, ncol=length(seqNames)))
colnames(dic2) <- seqNames
dic <- cbind(dic, dic2)
dic[is.na(dic)] <- 0
rm(dic2)
dic[, N := 0]
## create a function that calcualtes and assigns transition probabilities to the empty table above
assignProb <- function(DT, dictionary, startWith, topic, concat) {
sessionNum <- unique(DT[, SESSION_ID, with=F])
input <- DT[, .SD, .SDcols=!SESSION_ID]
freq <- countSequence(input, startWith, concat)
freq <- focusTopic(freq, topic)
props <- prop.table(freq)
totalFreq <- sum(freq)
for(i in 1:length(props)) {
if(totalFreq!=0) {
oriValue <- dictionary[get(SESSION_ID)==sessionNum, get(names(props[i]))]
dictionary[get(SESSION_ID)==sessionNum, names(props)[i] := oriValue + props[i]]
}
}
dictionary[get(SESSION_ID)==sessionNum, N := totalFreq]
}
## fill out the empty table
for(i in unique(dt[, SESSION_ID, with=F])) {
sel <- dtPreprocess(dt[get(SESSION_ID)==i])
assignProb(sel, dic, STARTWITH, TOPIC, CONCAT)
}
## combine the filled-out table with original data table
## This will attach the transition matrix back to your original dataset
dt <- dic[dt, on=SESSION_ID]
## export the file
fwrite(dt, OUTPUT_PATH)