-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4_gatherOutputs.R
98 lines (77 loc) · 3.33 KB
/
4_gatherOutputs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#This script reads all the outputs from the force-alignments and prepares
#new columns for storing acoustic information
#Author: Simon Gonzalez - [email protected]
#loads libraries
library(rPraat)
library(dplyr)
library(stringr)
library(ggplot2)
library(plotly)
library(readr)
source('./functions/findSegments.R')
source('./functions/findTranscription.R')
source('./functions/findWords.R')
source('./functions/get_name_parameter.R')
fls <- list.files('./output', full.names = T, pattern = '.TextGrid')
cntr = 1
for(i in fls){
print(paste0(cntr, '/', length(fls)))
tg <- tg.read(i, encoding = as.character(guess_encoding(i)$encoding[1]))
fullname <- gsub('.TextGrid', '', basename(i))
namesSplit <- unlist(strsplit(fullname, '_'))
tmpCorpora <- namesSplit[1]
tmpName <- namesSplit[2]
tmpSegsLocs <- which(!tg[[2]]$label %in% c('', 'sp', 'sil'))
tmpSegs <- tg[[2]]$label[tmpSegsLocs]
tmpSegsPrev <- tg[[2]]$label[tmpSegsLocs-1]
tmpSegsFoll <- tg[[2]]$label[tmpSegsLocs+1]
tmpSegsOnset <- tg[[2]]$t1[tmpSegsLocs]
tmpSegsOffset <- tg[[2]]$t2[tmpSegsLocs]
tmpSegsDur <- tmpSegsOffset - tmpSegsOnset
segmentTierLabel <- names(tg)[2]
transcriptTierLabel <- names(tg)[1]
#get words
wordlabel = NULL
wordOnset = NULL
wordOffset = NULL
wordMid = NULL
wordDur = NULL
wordLoc <- NULL
for(j in 1:length(tmpSegsLocs)){
wordlabel[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[1]]
wordOnset[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[2]]
wordOffset[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[3]]
wordMid[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[4]]
wordDur[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[5]]
wordLoc[j] = findWords(data = tg, segNumber = tmpSegsLocs[j], tierLabel = segmentTierLabel, wordTier = transcriptTierLabel)[[6]]
}
#get previous words
prevWords <- tg[[transcriptTierLabel]]$label[wordLoc-1]
#get following words
follWords <- tg[[transcriptTierLabel]]$label[wordLoc+1]
tmpdf <- data.frame(fullname, corpus = tmpCorpora, speaker = tmpName,
segment = tmpSegs, onset = tmpSegsOnset, offset = tmpSegsOffset, dur = tmpSegsDur,
word = wordlabel, wordOnset, wordOffset, wordDur,
previous = tmpSegsPrev, following = tmpSegsFoll)
if(i == fls[1]){
df <- tmpdf
}else{
df <- rbind(df, tmpdf)
}
write.csv(df, 'dfoutputs.csv', row.names = F)
cntr = cntr + 1
}
#identify the position of the segment in the word
df$position <- ifelse(df$onset == df$wordOnset, 'initial', 'medial')
df[df$offset == df$wordOffset, 'position'] <- 'final'
df[df$onset == df$wordOnset & df$offset == df$wordOffset, 'position'] <- 'both'
#creates duration and prepares the file to store acoustic information
df$mid <- df$onset + ((df$offset - df$onset)/2)
df$wordMid <- df$wordOnset + ((df$wordOffset - df$wordOnset)/2)
df$F1 <- 0
df$F2 <- 0
df$F3 <- 0
df$pitch <- 0
df$intensity <- 0
#saves data
write.csv(df, 'dfoutputs.csv', row.names = F, quote = F)